summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h53
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td260
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp1938
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h127
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp2830
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h384
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp1208
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h201
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h131
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h160
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td164
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp1903
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp1919
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp319
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h233
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1317
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp2120
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp1087
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h76
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp219
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp120
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h54
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp3211
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp8835
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h540
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td1965
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp68
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td5004
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td5030
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td1436
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td4022
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td1165
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp336
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.h183
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp1850
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp125
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h250
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td352
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRelocations.h62
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td261
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA8.td1034
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td1827
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleV6.td294
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp197
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h68
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp219
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h270
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp160
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h145
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp47
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h38
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp143
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp4615
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp4082
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp992
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h136
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h667
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp531
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h449
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h97
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp1468
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp73
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h76
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp263
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h71
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp388
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp331
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp366
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp100
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h59
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp706
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h69
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp273
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp611
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h78
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h43
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp890
-rw-r--r--contrib/llvm/lib/Target/Alpha/Alpha.h43
-rw-r--r--contrib/llvm/lib/Target/Alpha/Alpha.td68
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp166
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp66
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td38
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp143
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h43
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp425
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp962
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h142
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td268
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp382
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h85
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td1159
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp158
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h62
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp199
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h56
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td133
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRelocations.h31
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSchedule.td85
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp35
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h49
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp51
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h66
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp78
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h40
-rw-r--r--contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/Blackfin/Blackfin.h31
-rw-r--r--contrib/llvm/lib/Target/Blackfin/Blackfin.td202
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp156
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td30
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp130
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h47
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp180
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp645
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h83
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td34
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp256
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h81
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td862
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp104
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h32
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td34
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp344
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h77
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td277
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp44
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h49
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp43
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h68
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp22
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp81
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/CBackend/CBackend.cpp3617
-rw-r--r--contrib/llvm/lib/Target/CBackend/CTargetMachine.h42
-rw-r--r--contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td449
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h28
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp92
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h40
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.h31
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.td66
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td408
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp334
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td57
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp255
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h80
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp141
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp1203
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp3263
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h186
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td320
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp453
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h84
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td4484
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h49
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNodes.td159
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUOperands.td664
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp356
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h101
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td183
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h19
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSchedule.td59
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp66
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h97
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp74
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h91
-rw-r--r--contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp1978
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h44
-rw-r--r--contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp28
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp123
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp565
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp709
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h56
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp71
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h43
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.h32
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.td73
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp322
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td28
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp258
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp111
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp450
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h53
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp277
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp1148
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h185
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td219
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td229
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td204
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp297
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h240
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td889
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp113
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h33
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td131
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp166
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h50
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h170
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp193
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h73
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td148
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h47
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td55
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td236
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td267
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp63
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h75
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp62
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h85
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp90
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h40
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp159
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h240
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp224
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp140
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h49
-rw-r--r--contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp115
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h43
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp28
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp91
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.h47
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.td66
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp167
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp180
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td37
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp223
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h53
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp492
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp1198
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h182
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp336
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h92
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td1211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp150
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h50
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h46
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp240
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h66
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td85
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp32
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h42
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp52
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h70
-rw-r--r--contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/Mangler.cpp235
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp129
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h100
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp117
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h113
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h90
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp39
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp52
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp162
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h50
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.h37
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td97
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td214
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp458
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h71
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallingConv.td139
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp245
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp256
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp94
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp117
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp330
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.h49
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp349
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp2581
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h201
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td362
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td267
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp459
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.h195
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td1017
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp230
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsJITInfo.h70
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp126
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.h44
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp70
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h67
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h107
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp351
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h69
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td303
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRelocations.h41
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td63
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp54
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h133
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp119
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.h120
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp102
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h41
-rw-r--r--contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp31
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp192
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h47
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h63
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp35
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h28
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp95
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.h44
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.td141
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp597
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h57
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp179
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.h44
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp356
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp422
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.h85
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrFormats.td51
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp362
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.h133
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.td1013
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td278
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td110
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp544
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp32
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp68
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h164
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.cpp73
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.h86
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp58
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp74
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h57
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td37
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp149
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp66
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.h130
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp373
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.h115
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp302
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp191
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h70
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h45
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp66
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp193
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp151
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h51
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h76
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td112
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp686
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp174
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td132
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp261
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp971
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h321
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp308
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h73
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp1087
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp5793
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h493
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td749
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td695
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td907
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp655
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h149
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td1482
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp468
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h49
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp173
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h132
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp645
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h70
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td326
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td505
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td64
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td74
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td80
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td84
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp141
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h151
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp94
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h96
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp323
-rw-r--r--contrib/llvm/lib/Target/Sparc/FPMover.cpp141
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp79
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h41
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.h109
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.td72
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp251
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td36
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp80
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h41
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp212
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp1275
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h103
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td114
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp346
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h100
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td825
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h47
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp127
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td159
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp44
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h58
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp66
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h81
-rw-r--r--contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp32
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp81
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.h52
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.td61
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp221
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td46
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp386
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h57
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp779
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp868
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h145
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h128
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td340
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td133
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp439
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h113
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td1147
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h51
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td325
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp143
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h60
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td205
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp54
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h48
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp40
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h68
-rw-r--r--contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/Target.cpp108
-rw-r--r--contrib/llvm/lib/Target/TargetData.cpp598
-rw-r--r--contrib/llvm/lib/Target/TargetELFWriterInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/TargetFrameLowering.cpp45
-rw-r--r--contrib/llvm/lib/Target/TargetInstrInfo.cpp174
-rw-r--r--contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/TargetLibraryInfo.cpp74
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp325
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp284
-rw-r--r--contrib/llvm/lib/Target/TargetRegisterInfo.cpp122
-rw-r--r--contrib/llvm/lib/Target/TargetSubtargetInfo.cpp33
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp165
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp1189
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp607
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h157
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c1625
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h575
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h385
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp144
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h86
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp287
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h25
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp146
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h96
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp458
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h548
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h33
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp138
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h46
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp1074
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp463
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h103
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp554
-rw-r--r--contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp243
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h107
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h71
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td268
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp715
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.h87
-rw-r--r--contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h46
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td401
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp999
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp153
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h59
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp2162
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp1743
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp1447
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h68
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp2259
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp14965
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h979
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td102
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td1173
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrBuilder.h184
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td104
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td1737
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td304
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td151
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td60
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td648
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td541
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td467
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp3532
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h375
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td1799
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td454
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td6799
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td746
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td467
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVMX.td60
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.cpp574
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.h81
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp701
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.h52
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h135
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp870
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h138
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td474
-rw-r--r--contrib/llvm/lib/Target/X86/X86Relocations.h52
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp259
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h56
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp373
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h293
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp158
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h133
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp45
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h38
-rw-r--r--contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp105
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp29
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp88
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h40
-rw-r--r--contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.h31
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.td46
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp324
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreCallingConv.td36
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp380
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h57
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp300
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp1615
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h202
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td120
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp406
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h93
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td1251
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h69
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp327
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h80
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td56
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp28
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.h42
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp44
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h63
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp65
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h25
608 files changed, 247627 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
new file mode 100644
index 0000000..16d0da3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -0,0 +1,53 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMAsmPrinter;
+class ARMBaseTargetMachine;
+class FunctionPass;
+class JITCodeEmitter;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
new file mode 100644
index 0000000..5c727ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -0,0 +1,260 @@
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+ "Thumb mode">;
+
+def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
+ "Native client mode">;
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+ "Enable VFP3 instructions",
+ [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable NEON instructions",
+ [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+ "Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision floating point">;
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict VFP3 to 16 double registers">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
+
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+ "AvoidCPSRPartialUpdate", "true",
+ "Avoid CPSR partial update for OOO execution">;
+
+/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
+def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
+ "Supports v7 DSP instructions in Thumb2">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
+
+// M-series ISA?
+def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+ "Is microcontroller profile ('M' series)">;
+
+// ARM ISAs.
+def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
+ "Support ARM v4T instructions">;
+def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
+ "Support ARM v5T instructions",
+ [HasV4TOps]>;
+def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+ "Support ARM v5TE, v5TEj, and v5TExp instructions",
+ [HasV5TOps]>;
+def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
+ "Support ARM v6 instructions",
+ [HasV5TEOps]>;
+def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+ "Support ARM v6t2 instructions",
+ [HasV6Ops, FeatureThumb2]>;
+def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
+ "Support ARM v7 instructions",
+ [HasV6T2Ops]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+include "ARMSchedule.td"
+
+// ARM processor families.
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors",
+ [FeatureSlowFPBrcc, FeatureNEONForFP,
+ FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors",
+ [FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
+
+// V4 Processors.
+def : ProcNoItin<"generic", []>;
+def : ProcNoItin<"arm8", []>;
+def : ProcNoItin<"arm810", []>;
+def : ProcNoItin<"strongarm", []>;
+def : ProcNoItin<"strongarm110", []>;
+def : ProcNoItin<"strongarm1100", []>;
+def : ProcNoItin<"strongarm1110", []>;
+
+// V4T Processors.
+def : ProcNoItin<"arm7tdmi", [HasV4TOps]>;
+def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>;
+def : ProcNoItin<"arm710t", [HasV4TOps]>;
+def : ProcNoItin<"arm720t", [HasV4TOps]>;
+def : ProcNoItin<"arm9", [HasV4TOps]>;
+def : ProcNoItin<"arm9tdmi", [HasV4TOps]>;
+def : ProcNoItin<"arm920", [HasV4TOps]>;
+def : ProcNoItin<"arm920t", [HasV4TOps]>;
+def : ProcNoItin<"arm922t", [HasV4TOps]>;
+def : ProcNoItin<"arm940t", [HasV4TOps]>;
+def : ProcNoItin<"ep9312", [HasV4TOps]>;
+
+// V5T Processors.
+def : ProcNoItin<"arm10tdmi", [HasV5TOps]>;
+def : ProcNoItin<"arm1020t", [HasV5TOps]>;
+
+// V5TE Processors.
+def : ProcNoItin<"arm9e", [HasV5TEOps]>;
+def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm946e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm966e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm968e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm10e", [HasV5TEOps]>;
+def : ProcNoItin<"arm1020e", [HasV5TEOps]>;
+def : ProcNoItin<"arm1022e", [HasV5TEOps]>;
+def : ProcNoItin<"xscale", [HasV5TEOps]>;
+def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
+
+// V6 Processors.
+def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+ FeatureDB, FeatureMClass]>;
+
+// V6T2 Processors.
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
+ FeatureDSPThumb2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx,
+ FeatureDSPThumb2]>;
+
+// V7a Processors.
+def : Processor<"cortex-a8", CortexA8Itineraries,
+ [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2]>;
+def : Processor<"cortex-a9", CortexA9Itineraries,
+ [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2]>;
+def : Processor<"cortex-a9-mp", CortexA9Itineraries,
+ [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureMP]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureMClass]>;
+
+// V7EM Processors.
+def : ProcNoItin<"cortex-m4", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureDSPThumb2,
+ FeatureT2XtPk, FeatureVFP2,
+ FeatureVFPOnlySP, FeatureMClass]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+
+ let AssemblyWriters = [ARMAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 0000000..ea3319f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1938 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ // Per section and per symbol attributes are not supported.
+ // To implement them we would need the ability to delay this emission
+ // until the assembly file is fully parsed/generated as only then do we
+ // know the symbol and section numbers.
+ class AttributeEmitter {
+ public:
+ virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+ virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+ virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+ virtual void Finish() = 0;
+ virtual ~AttributeEmitter() {}
+ };
+
+ class AsmAttributeEmitter : public AttributeEmitter {
+ MCStreamer &Streamer;
+
+ public:
+ AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+ void MaybeSwitchVendor(StringRef Vendor) { }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ Streamer.EmitRawText("\t.eabi_attribute " +
+ Twine(Attribute) + ", " + Twine(Value));
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ switch (Attribute) {
+ case ARMBuildAttrs::CPU_name:
+ Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+ break;
+ /* GAS requires .fpu to be emitted regardless of EABI attribute */
+ case ARMBuildAttrs::Advanced_SIMD_arch:
+ case ARMBuildAttrs::VFP_arch:
+ Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+ break;
+ default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
+ }
+ }
+ void Finish() { }
+ };
+
+ class ObjectAttributeEmitter : public AttributeEmitter {
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItemType {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+ } AttributeItem;
+
+ MCObjectStreamer &Streamer;
+ StringRef CurrentVendor;
+ SmallVector<AttributeItemType, 64> Contents;
+
+ // Account for the ULEB/String size of each item,
+ // not just the number of items
+ size_t ContentsSize;
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
+
+ public:
+ ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+ Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
+
+ void MaybeSwitchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor.empty())
+ CurrentVendor = Vendor;
+ else if (CurrentVendor == Vendor)
+ return;
+ else
+ Finish();
+
+ CurrentVendor = Vendor;
+
+ assert(Contents.size() == 0);
+ }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ AttributeItemType attr = {
+ AttributeItemType::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ ContentsSize += getULEBSize(Attribute);
+ ContentsSize += getULEBSize(Value);
+ Contents.push_back(attr);
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ AttributeItemType attr = {
+ AttributeItemType::TextAttribute,
+ Attribute,
+ 0,
+ String
+ };
+ ContentsSize += getULEBSize(Attribute);
+ // String + \0
+ ContentsSize += String.size()+1;
+
+ Contents.push_back(attr);
+ }
+
+ void Finish() {
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (unsigned int i=0; i<Contents.size(); ++i) {
+ AttributeItemType item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag, 0);
+ switch (item.Type) {
+ case AttributeItemType::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue, 0);
+ break;
+ case AttributeItemType::TextAttribute:
+ Streamer.EmitBytes(UppercaseString(item.StringValue), 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ default:
+ assert(0 && "Invalid attribute type");
+ }
+ }
+
+ Contents.clear();
+ }
+ };
+
+} // end of anonymous namespace
+
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+ AsmPrinter::EmitDwarfRegOp(MLoc);
+ else {
+ unsigned Reg = MLoc.getReg();
+ if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+ // S registers are described as bit-pieces of a register
+ // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+ // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+
+ unsigned SReg = Reg - ARM::S0;
+ bool odd = SReg & 0x1;
+ unsigned Rx = 256 + (SReg >> 1);
+
+ OutStreamer.AddComment("DW_OP_regx for S register");
+ EmitInt8(dwarf::DW_OP_regx);
+
+ OutStreamer.AddComment(Twine(SReg));
+ EmitULEB128(Rx);
+
+ if (odd) {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(32);
+ } else {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(0);
+ }
+ } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+ assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+ // Q registers Q0-Q15 are described by composing two D registers together.
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1)
+ // DW_OP_piece(8)
+
+ unsigned QReg = Reg - ARM::Q0;
+ unsigned D1 = 256 + 2 * QReg;
+ unsigned D2 = D1 + 1;
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D1);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D2);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+ }
+ }
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ OutStreamer.ForceCodeRegion();
+
+ if (AFI->isThumbFunction()) {
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer.EmitThumbFunc(CurrentFnSym);
+ }
+
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned TF = MO.getTargetFlags();
+
+ switch (MO.getType()) {
+ default:
+ assert(0 && "<unknown operand type>");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ O << ARMInstPrinter::getRegisterName(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ int64_t Imm = MO.getImm();
+ O << '#';
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF == ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF == ARMII::MO_HI16))
+ O << ":upper16:";
+ O << Imm;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF & ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF & ARMII::MO_HI16))
+ O << ":upper16:";
+ O << *Mang->getSymbol(GV);
+
+ printOffset(MO.getOffset(), O);
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << '_' << uid2
+ << "_set_" << MBB->getNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid << '_' << uid2;
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+ << getFunctionNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // Print as a memory address.
+ if (MI->getOperand(OpNum).isReg()) {
+ O << "["
+ << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+ << "]";
+ return false;
+ }
+ // Fallthrough
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
+ printOperand(MI, OpNum, O);
+ return false;
+ case 'y': // Print a VFP single precision register as indexed double.
+ // This uses the ordering of the alias table to get the first 'd' register
+ // that overlaps the 's' register. Also, s0 is an odd register, hence the
+ // odd modulus check below.
+ if (MI->getOperand(OpNum).isReg()) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ O << ARMInstPrinter::getRegisterName(TRI->getAliasSet(Reg)[0]) <<
+ (((Reg % 2) == 1) ? "[0]" : "[1]");
+ return false;
+ }
+ return true;
+ case 'B': // Bitwise inverse of integer or symbol without a preceding #.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << ~(MI->getOperand(OpNum).getImm());
+ return false;
+ case 'L': // The low 16 bits of an immediate constant.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << (MI->getOperand(OpNum).getImm() & 0xffff);
+ return false;
+ case 'M': { // A register range suitable for LDM/STM.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned RegBegin = MO.getReg();
+ // This takes advantage of the 2 operand-ness of ldm/stm and that we've
+ // already got the operands in registers that are operands to the
+ // inline asm statement.
+
+ O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
+
+ // FIXME: The register allocator not only may not have given us the
+ // registers in sequence, but may not be in ascending registers. This
+ // will require changes in the register allocator that'll need to be
+ // propagated down here if the operands change.
+ unsigned RegOps = OpNum + 1;
+ while (MI->getOperand(RegOps).isReg()) {
+ O << ", "
+ << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
+ RegOps++;
+ }
+
+ O << "}";
+
+ return false;
+ }
+ case 'R': // The most significant register of a pair.
+ case 'Q': { // The least significant register of a pair.
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumVals != 2)
+ return true;
+ unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+
+ // These modifiers are not yet supported.
+ case 'p': // The high single-precision register of a VFP double-precision
+ // register.
+ case 'e': // The low doubleword register of a NEON quad register.
+ case 'f': // The high doubleword register of a NEON quad register.
+ case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
+ case 'H': // The highest-numbered register of a pair.
+ return true;
+ }
+ }
+
+ printOperand(MI, OpNum, O);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ case 'A': // A memory operand for a VLD1/VST1 instruction.
+ default: return true; // Unknown modifier.
+ case 'm': // The base register of a memory operand.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ O << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg());
+ return false;
+ }
+ }
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+ return false;
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+ // Declare all the text sections up front (before the DWARF sections
+ // emitted by AsmPrinter::doInitialization) so the assembler will keep
+ // them together at the beginning of the object file. This helps
+ // avoid out-of-range branches that are due a fundamental limitation of
+ // the way symbol offsets are encoded with the current Darwin ARM
+ // relocations.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(
+ getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextSection());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+ if (RelocM == Reloc::DynamicNoPIC) {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 12, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ } else {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 16, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ }
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
+ }
+ }
+
+ // Use unified assembler syntax.
+ OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+ // Emit ARM Build Attributes
+ if (Subtarget->isTargetELF()) {
+
+ emitAttributes();
+ }
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ // All darwin targets use mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output non-lazy-pointers for external and common global variables.
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+ emitARMAttributeSection();
+
+ /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+ bool emitFPU = false;
+ AttributeEmitter *AttrEmitter;
+ if (OutStreamer.hasRawTextSupport()) {
+ AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+ emitFPU = true;
+ } else {
+ MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+ AttrEmitter = new ObjectAttributeEmitter(O);
+ }
+
+ AttrEmitter->MaybeSwitchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString == "cortex-a8" ||
+ Subtarget->isCortexA8()) {
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ // Fixme: figure out when this is emitted.
+ //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+ // ARMBuildAttrs::AllowWMMXv1);
+ //
+
+ /// ADD additional Else-cases here!
+ } else if (CPUString == "xscale") {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ } else if (CPUString == "generic") {
+ // FIXME: Why these defaults?
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (Subtarget->hasNEON() && emitFPU) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/vfpv3/vfpv2 for .fpu parameters */
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ /* If emitted for NEON, omit from VFP below, since you can have both
+ * NEON and VFP in build attributes but only one .fpu */
+ emitFPU = false;
+ }
+
+ /* VFPv3 + .fpu */
+ if (Subtarget->hasVFP3()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+ /* VFPv2 + .fpu */
+ } else if (Subtarget->hasVFP2()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ }
+
+ /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+ * since NEON can have 1 (allowed) or 2 (MAC operations) */
+ if (Subtarget->hasNEON()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::Allowed);
+ }
+
+ // Signal various FP modes.
+ if (!UnsafeFPMath) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (NoInfsFPMath && NoNaNsFPMath)
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
+ else
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
+
+ // FIXME: add more flags to ARMBuildAttrs.h
+ // 8-bytes alignment stuff.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // Hard float. Use both S and D registers and conform to AAPCS-VFP.
+ if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+ }
+ // FIXME: Should we signal R9 usage?
+
+ if (Subtarget->hasDivide())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+ AttrEmitter->Finish();
+ delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (OutStreamer.hasRawTextSupport())
+ return;
+
+ const ARMElfTargetObjectFile &TLOFELF =
+ static_cast<const ARMElfTargetObjectFile &>
+ (getObjFileLowering());
+
+ OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+ // Format version
+ OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
+ case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
+ case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+ case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
+ case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
+ }
+ return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+ bool isIndirect = Subtarget->isTargetDarwin() &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+ if (!isIndirect)
+ return Mang->getSymbol(GV);
+
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+ MMIMachO.getGVStubEntry(MCSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+ MCSymbol *MCSym;
+ if (ACPV->isLSDA()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+ MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ } else if (ACPV->isBlockAddress()) {
+ const BlockAddress *BA =
+ cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
+ MCSym = GetBlockAddressSymbol(BA);
+ } else if (ACPV->isGlobalValue()) {
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
+ MCSym = GetARMGVSymbol(GV);
+ } else if (ACPV->isMachineBasicBlock()) {
+ const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB();
+ MCSym = MBB->getSymbol();
+ } else {
+ assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ MCSym = GetExternalSymbolSymbol(Sym);
+ }
+
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ OutContext);
+
+ if (ACPV->getPCAdjustment()) {
+ MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ ACPV->getLabelId(),
+ OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+ PCRelExpr =
+ MCBinaryExpr::CreateAdd(PCRelExpr,
+ MCConstantExpr::Create(ACPV->getPCAdjustment(),
+ OutContext),
+ OutContext);
+ if (ACPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+ }
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = 1;
+ if (Opcode == ARM::BR_JTadd)
+ OpNum = 2;
+ else if (Opcode == ARM::BR_JTm)
+ OpNum = 3;
+
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Tag the jump table appropriately for precise disassembly.
+ OutStreamer.EmitJumpTable32Region();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ // Construct an MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr)
+ //
+ // For example, a table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .word (LBB0 - LJTI_0_0)
+ // .word (LBB1 - LJTI_0_0)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+ OutContext),
+ OutContext);
+ // If we're generating a table of Thumb addresses in static relocation
+ // model, we need to add one to keep interworking correctly.
+ else if (AFI->isThumbFunction())
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, 4);
+ }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ if (MI->getOpcode() == ARM::t2TBB_JT) {
+ OutStreamer.EmitJumpTable8Region();
+ } else if (MI->getOpcode() == ARM::t2TBH_JT) {
+ OutStreamer.EmitJumpTable16Region();
+ } else {
+ OutStreamer.EmitJumpTable32Region();
+ }
+
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ unsigned OffsetWidth = 4;
+ if (MI->getOpcode() == ARM::t2TBB_JT)
+ OffsetWidth = 1;
+ else if (MI->getOpcode() == ARM::t2TBH_JT)
+ OffsetWidth = 2;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+ OutContext);
+ // If this isn't a TBB or TBH, the entries are direct branch instructions.
+ if (OffsetWidth == 4) {
+ MCInst BrInst;
+ BrInst.setOpcode(ARM::t2B);
+ BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+ BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ BrInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(BrInst);
+ continue;
+ }
+ // Otherwise it's an offset from the dispatch instruction. Construct an
+ // MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr) / 2
+ //
+ // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .byte (LBB0 - LJTI_0_0) / 2
+ // .byte (LBB1 - LJTI_0_0) / 2
+ const MCExpr *Expr =
+ MCBinaryExpr::CreateSub(MBBSymbolExpr,
+ MCSymbolRefExpr::Create(JTISymbol, OutContext),
+ OutContext);
+ Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, OffsetWidth);
+ }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+ const MCSymbol *Label,
+ unsigned pred, unsigned ccreg,
+ MCContext &Ctx) {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+ Inst.addOperand(MCOperand::CreateReg(Dest));
+ Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ // Add predicate operands.
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+ unsigned Opcode) {
+ MCInst TmpInst;
+
+ // Emit the instruction as usual, just patch the opcode.
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(Opcode);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+ assert(MI->getFlag(MachineInstr::FrameSetup) &&
+ "Only instruction which are involved into frame setup code are allowed");
+
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned Opc = MI->getOpcode();
+ unsigned SrcReg, DstReg;
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+ // Two special cases:
+ // 1) tPUSH does not have src/dst regs.
+ // 2) for Thumb1 code we sometimes materialize the constant via constpool
+ // load. Yes, this is pretty fragile, but for now I don't see better
+ // way... :(
+ SrcReg = DstReg = ARM::SP;
+ } else {
+ SrcReg = MI->getOperand(1).getReg();
+ DstReg = MI->getOperand(0).getReg();
+ }
+
+ // Try to figure out the unwinding opcode out of src / dst regs.
+ if (MI->getDesc().mayStore()) {
+ // Register saves.
+ assert(DstReg == ARM::SP &&
+ "Only stack pointer as a destination reg is supported");
+
+ SmallVector<unsigned, 4> RegList;
+ // Skip src & dst reg, and pred ops.
+ unsigned StartOp = 2 + 2;
+ // Use all the operands.
+ unsigned NumOffset = 0;
+
+ switch (Opc) {
+ default:
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ case ARM::tPUSH:
+ // Special case here: no src & dst reg, but two extra imp ops.
+ StartOp = 2; NumOffset = 2;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ assert(SrcReg == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+ i != NumOps; ++i)
+ RegList.push_back(MI->getOperand(i).getReg());
+ break;
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ assert(MI->getOperand(2).getReg() == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ RegList.push_back(SrcReg);
+ break;
+ }
+ OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ } else {
+ // Changes of stack / frame pointer.
+ if (SrcReg == ARM::SP) {
+ int64_t Offset = 0;
+ switch (Opc) {
+ default:
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ case ARM::MOVr:
+ Offset = 0;
+ break;
+ case ARM::ADDri:
+ Offset = -MI->getOperand(2).getImm();
+ break;
+ case ARM::SUBri:
+ Offset = MI->getOperand(2).getImm();
+ break;
+ case ARM::tSUBspi:
+ Offset = MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tADDspi:
+ case ARM::tADDrSPi:
+ Offset = -MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI.getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ // FIXME: Check for user, it should be "add" instruction!
+ Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ break;
+ }
+ }
+
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ OutStreamer.EmitPad(Offset);
+ } else {
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ } else if (DstReg == ARM::SP) {
+ // FIXME: .movsp goes here
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ else {
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY)
+ OutStreamer.EmitCodeRegion();
+
+ // Emit unwinding stuff for frame-related instructions
+ if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag setting opcode should be expanded early");
+
+ // Check for manual lowerings.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+ case ARM::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case ARM::LEApcrel:
+ case ARM::tLEApcrel:
+ case ARM::t2LEApcrel: {
+ // FIXME: Need to also handle globals and externals
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetCPISymbol(MI->getOperand(1).getIndex()),
+ MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LEApcrelJT:
+ case ARM::tLEApcrelJT:
+ case ARM::t2LEApcrelJT: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm()),
+ MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ // Darwin call instructions are just normal call instructions with different
+ // clobber semantics (they clobber R9).
+ case ARM::BXr9_CALL:
+ case ARM::BX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::tBXr9_CALL:
+ case ARM::tBX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBX);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::BMOVPCRXr9_CALL:
+ case ARM::BMOVPCRX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+ unsigned TF = MI->getOperand(1).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+ ? ARM::MOVTi16 : ARM::t2MOVTi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+ unsigned TF = MI->getOperand(2).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(2).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::tPICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::tADDhirr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc, r0
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::ADDrr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ // Add 's' bit operand (always reg0 for this)
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICSTR:
+ case ARM::PICSTRB:
+ case ARM::PICSTRH:
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICLDRH:
+ case ARM::PICLDRSB:
+ case ARM::PICLDRSH: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // OP r0, [pc, r0]
+ // The LCP0 label is referenced by a constant pool entry in order to get
+ // a PC-relative address at the ldr instruction.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the load
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case ARM::PICSTR: Opcode = ARM::STRrs; break;
+ case ARM::PICSTRB: Opcode = ARM::STRBrs; break;
+ case ARM::PICSTRH: Opcode = ARM::STRH; break;
+ case ARM::PICLDR: Opcode = ARM::LDRrs; break;
+ case ARM::PICLDRB: Opcode = ARM::LDRBrs; break;
+ case ARM::PICLDRH: Opcode = ARM::LDRH; break;
+ case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+ case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+ }
+ MCInst LdStInst;
+ LdStInst.setOpcode(Opcode);
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ LdStInst.addOperand(MCOperand::CreateImm(0));
+ // Add predicate operands.
+ LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ OutStreamer.EmitInstruction(LdStInst);
+
+ return;
+ }
+ case ARM::CONSTPOOL_ENTRY: {
+ /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+ /// in the function. The first operand is the ID# for this instruction, the
+ /// second is the index into the MachineConstantPool that this is, the third
+ /// is the size in bytes of this constant pool entry.
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ EmitAlignment(2);
+
+ // Mark the constant pool entry as data if we're not already in a data
+ // region.
+ OutStreamer.EmitDataRegion();
+ OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ return;
+ }
+ case ARM::t2BR_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::t2TBB_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBB);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
+ return;
+ }
+ case ARM::t2TBH_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBH);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // mov pc, target
+ MCInst TmpInst;
+ unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+ ARM::MOVr : ARM::tMOVr;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ if (Opc == ARM::MOVr)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Make sure the Thumb jump table is 4-byte aligned.
+ if (Opc == ARM::tMOVr)
+ EmitAlignment(2);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTm: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // ldr pc, target
+ MCInst TmpInst;
+ if (MI->getOperand(1).getReg() == 0) {
+ // literal offset
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+ } else {
+ TmpInst.setOpcode(ARM::LDRrs);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTadd: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // add pc, target, idx
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDrr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::TRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.long 0xe7ffdefe @ trap
+ uint32_t Val = 0xe7ffdefeUL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
+ break;
+ }
+ case ARM::tTRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.short 57086 @ trap
+ uint16_t Val = 0xdefe;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 2);
+ return;
+ }
+ break;
+ }
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // mov $val, pc
+ // adds $val, #7
+ // str $val, [$src, #4]
+ // movs r0, #0
+ // b 1f
+ // movs r0, #1
+ // 1:
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+ MCSymbol *Label = GetARMSJLJEHLabel();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDi3);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ // 's' bit operand
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateImm(7));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tSTRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #4. The operand value is scaled by 4 for the
+ // tSTR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tB);
+ TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ OutStreamer.EmitLabel(Label);
+ return;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::Int_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // add $val, pc, #8
+ // str $val, [$src, #+4]
+ // mov r0, #0
+ // add pc, pc, #0
+ // mov r0, #1
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::Int_eh_sjlj_longjmp: {
+ // ldr sp, [$src, #8]
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::tInt_eh_sjlj_longjmp: {
+ // ldr $scratch, [$src, #8]
+ // mov sp, $scratch
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #8. The operand value is scaled by 4 for the
+ // tLDR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(2));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBX);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+ RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+ RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 0000000..7741fc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,127 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+public:
+ explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+
+ void EmitJumpTable(const MachineInstr *MI);
+ void EmitJump2Table(const MachineInstr *MI);
+ virtual void EmitInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ virtual void EmitConstantPool() {} // we emit constant pools customly!
+ virtual void EmitFunctionEntryLabel();
+ void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
+private:
+ // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ void emitAttributes();
+
+ // Helper for ELF .o only
+ void emitARMAttributeSection();
+
+ // Generic helper used to emit e.g. ARMv5 mul pseudos
+ void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+ void EmitUnwindingInstruction(const MachineInstr *MI);
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+public:
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ /// EmitDwarfRegOp - Emit dwarf register operation.
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const;
+ MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+ MCSymbol *GetARMSJLJEHLabel(void) const;
+
+ MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 0000000..408edfc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,2830 @@
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+static cl::opt<bool>
+WidenVMOVS("widen-vmovs", cl::Hidden,
+ cl::desc("Widen ARM vmovs to vmovd when possible"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+ unsigned MLxOpc; // MLA / MLS opcode
+ unsigned MulOpc; // Expanded multiplication opcode
+ unsigned AddSubOpc; // Expanded add / sub opcode
+ bool NegAcc; // True if the acc is negated before the add / sub.
+ bool HasLane; // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+ // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
+ // fp scalar ops
+ { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
+ { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
+ { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
+ { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
+ { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
+ { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
+
+ // fp SIMD ops
+ { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
+ { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
+ { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
+ { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
+ { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
+ { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
+ { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
+ { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+ : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ Subtarget(STI) {
+ for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+ if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+ }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ if (usePreRAHazardRecognizer()) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+ }
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ return (ScheduleHazardRecognizer *)
+ new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ // FIXME: Thumb2 support.
+
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ bool isLoad = !MCID.mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ if (ARM_AM::getSOImmVal(Amt) == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // ...likewise if it ends with a branch table followed by an unconditional
+ // branch. The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+ isIndirectBranchOpcode(SecondLastOpc)) &&
+ isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+ int BOpc = !AFI->isThumbFunction()
+ ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+ int BccOpc = !AFI->isThumbFunction()
+ ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+ bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) { // Unconditional branch?
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (isUncondBranchOpcode(Opc)) {
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+ MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+ MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // FIXME: This confuses implicit_def with optional CPSR def.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+ return false;
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isPredicable())
+ return false;
+
+ if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ return AFI->isThumb2Function();
+ }
+ return true;
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+LLVM_ATTRIBUTE_NOINLINE
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI);
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ assert(JTI < JT.size());
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI->isLabel())
+ return 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr:
+ case ARM::t2BR_JT:
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+ // entry is one byte; TBH two byte each.
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+ unsigned NumOps = MCID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ assert(MJTI != 0);
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+ unsigned NumEntries = getNumJTEntries(JT, JTI);
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+ // Make sure the instruction that follows TBB is 2-byte aligned.
+ // FIXME: Constant island pass should insert an "ALIGN" instruction
+ // instead.
+ ++NumEntries;
+ return NumEntries * EntrySize + InstSize;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ return 0; // Not reached
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
+ }
+
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc = 0;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq;
+
+ if (Opc) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc == ARM::VORRq)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPred(MIB);
+ return;
+ }
+
+ // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
+ ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
+ unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
+ ARM::qsub_1 : ARM::qsub_3;
+ for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, i);
+ unsigned Src = TRI->getSubReg(SrcReg, i);
+ MachineInstrBuilder Mov =
+ AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq))
+ .addReg(Dst, RegState::Define)
+ .addReg(Src, getKillRegState(KillSrc))
+ .addReg(Src, getKillRegState(KillSrc)));
+ if (i == EndSubReg) {
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
+ }
+ }
+ return;
+ }
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown reg class!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STRrs:
+ case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::STRi12:
+ case ARM::t2STRi12:
+ case ARM::tSTRspi:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VST1q64Pseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::LDRi12:
+ case ARM::t2LDRi12:
+ case ARM::tLDRspi:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLD1q64Pseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLDMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+}
+
+bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+ // This hook gets to expand COPY instructions before they become
+ // copyPhysReg() calls. Look for VMOVS instructions that can legally be
+ // widened to VMOVD. We prefer the VMOVD when possible because it may be
+ // changed into a VORR that can go down the NEON pipeline.
+ if (!WidenVMOVS || !MI->isCopy())
+ return false;
+
+ // Look for a copy between even S-registers. That is where we keep floats
+ // when using NEON v2f32 instructions for f32 arithmetic.
+ unsigned DstRegS = MI->getOperand(0).getReg();
+ unsigned SrcRegS = MI->getOperand(1).getReg();
+ if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DstRegD || !SrcRegD)
+ return false;
+
+ // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
+ // legal if the COPY already defines the full DstRegD, and it isn't a
+ // sub-register insertion.
+ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ return false;
+
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
+ DEBUG(dbgs() << "widening: " << *MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
+ MI->setDesc(get(ARM::VMOVD));
+ MI->getOperand(0).setReg(DstRegD);
+ MI->getOperand(1).setReg(SrcRegD);
+ AddDefaultPred(MachineInstrBuilder(MI));
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+ MachineConstantPool *MCP = MF.getConstantPool();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
+ if (ACPV->isGlobalValue())
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
+ else if (ACPV->isExtSymbol())
+ NewCPV = ARMConstantPoolSymbol::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else if (ACPV->isMachineBasicBlock())
+ NewCPV = ARMConstantPoolMBB::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+ MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+ switch(Orig->getOpcode()) {
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ Orig->getOperand(1).setIndex(CPI);
+ Orig->getOperand(2).setImm(PCLabelId);
+ break;
+ }
+ }
+ return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ int Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci ||
+ Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci ||
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ if (Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+ bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+ if (isARMCP0 && isARMCP1) {
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ } else if (!isARMCP0 && !isARMCP1) {
+ return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+ }
+ return false;
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
+ }
+
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ if (!NumCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ unsigned UnpredCost = Probability.getNumerator() * NumCycles;
+ UnpredCost /= Probability.getDenominator();
+ UnpredCost += 1; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+ return (NumCycles + ExtraPredCycles) <= UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
+ const BranchProbability &Probability) const {
+ if (!TCycles || !FCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ unsigned TUnpredCost = Probability.getNumerator() * TCycles;
+ TUnpredCost /= Probability.getDenominator();
+
+ uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
+ unsigned FUnpredCost = Comp * FCycles;
+ FUnpredCost /= Probability.getDenominator();
+
+ unsigned UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+ return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+ if (Opc == ARM::B)
+ return ARM::Bcc;
+ else if (Opc == ARM::tB)
+ return ARM::tBcc;
+ else if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
+
+ llvm_unreachable("Unknown unconditional branch opcode!");
+ return 0;
+}
+
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
+/// instruction is encoded with an 'S' bit is determined by the optional CPSR
+/// def operand.
+///
+/// This will go away once we can teach tblgen how to set the optional CPSR def
+/// operand itself.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADDSri, ARM::ADDri},
+ {ARM::ADDSrr, ARM::ADDrr},
+ {ARM::ADDSrsi, ARM::ADDrsi},
+ {ARM::ADDSrsr, ARM::ADDrsr},
+
+ {ARM::SUBSri, ARM::SUBri},
+ {ARM::SUBSrr, ARM::SUBrr},
+ {ARM::SUBSrsi, ARM::SUBrsi},
+ {ARM::SUBSrsr, ARM::SUBrsr},
+
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrr, ARM::RSBrr},
+ {ARM::RSBSrsi, ARM::RSBrsi},
+ {ARM::RSBSrsr, ARM::RSBrsr},
+
+ {ARM::t2ADDSri, ARM::t2ADDri},
+ {ARM::t2ADDSrr, ARM::t2ADDrr},
+ {ARM::t2ADDSrs, ARM::t2ADDrs},
+
+ {ARM::t2SUBSri, ARM::t2SUBri},
+ {ARM::t2SUBSrr, ARM::t2SUBrr},
+ {ARM::t2SUBSrs, ARM::t2SUBrs},
+
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
+ if (OldOpc == OpcPair->PseudoOpc) {
+ return OpcPair->MachineOpc;
+ }
+ }
+ return 0;
+}
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ BaseReg = DestReg;
+ }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(FrameRegIdx+1);
+ Offset = 0;
+ return true;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getSOImmVal(Offset) != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+ // as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode_i12: {
+ ImmIdx = FrameRegIdx + 1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Can't fold any offset even if it's zero.
+ return false;
+ case ARMII::AddrMode5: {
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Attempt to fold address comp. if opcode has offset bits
+ if (NumBits > 0) {
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // FIXME: When addrmode2 goes away, this will simplify (like the
+ // T2 version), as the LDR.i12 versions don't need the encoding
+ // tricks for the offset value.
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+ int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::t2CMPri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ case ARM::COPY: {
+ // Walk down one instruction which is potentially an 'and'.
+ const MachineInstr &Copy = *MI;
+ MachineBasicBlock::iterator AND(
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ if (AND == MI->getParent()->end()) return false;
+ MI = AND;
+ return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+ CmpMask, true);
+ }
+ }
+
+ return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
+bool ARMBaseInstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI) const {
+ if (CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ MachineInstr *MI = &*DI;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ MI = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
+ B = MI->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg()) continue;
+
+ // This instruction modifies or uses CPSR after the one we want to
+ // change. We can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri: {
+ // Scan forward for the use of CPSR, if it's a conditional code requires
+ // checking of V bit, then this is not safe to do. If we can't find the
+ // CPSR use (i.e. used in another block), then it's not safe to perform
+ // the optimization.
+ bool isSafe = false;
+ I = CmpInstr;
+ E = MI->getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR.
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ switch (CC) {
+ default:
+ isSafe = true;
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
+ }
+ }
+
+ if (!isSafe)
+ return false;
+
+ // Toggle the optional operand to CPSR.
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc = 0;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ *UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ break;
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ //
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the the address is not 64-bit aligned.
+ //
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
+ // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++UOps;
+ return UOps;
+ } else if (Subtarget.isCortexA9()) {
+ UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ isSLoad = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+
+ switch (UseMCID.getOpcode()) {
+ default: break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ isSStore = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefMCID.getSchedClass();
+ unsigned UseClass = UseMCID.getSchedClass();
+
+ if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ int DefCycle = -1;
+ bool LdmBypass = false;
+ switch (DefMCID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseMCID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx)) {
+ --UseCycle;
+ }
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+
+ const MCInstrDesc &DefMCID = DefMI->getDesc();
+ if (!ItinData || ItinData->isEmpty())
+ return DefMCID.mayLoad() ? 3 : 1;
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ if (DefMO.getReg() == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMCID.isBranch())
+ return 0;
+ }
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD1d64T:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD1d64Q:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8_UPD:
+ case ARM::VLD1DUPq16_UPD:
+ case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8_UPD:
+ case ARM::VLD2DUPd16_UPD:
+ case ARM::VLD2DUPd32_UPD:
+ case ARM::VLD4DUPd8:
+ case ARM::VLD4DUPd16:
+ case ARM::VLD4DUPd32:
+ case ARM::VLD4DUPd8_UPD:
+ case ARM::VLD4DUPd16_UPD:
+ case ARM::VLD4DUPd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16_UPD:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
+ case ARM::VLD2LNd8_UPD:
+ case ARM::VLD2LNd16_UPD:
+ case ARM::VLD2LNd32_UPD:
+ case ARM::VLD2LNq16_UPD:
+ case ARM::VLD2LNq32_UPD:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
+ case ARM::VLD4LNd8_UPD:
+ case ARM::VLD4LNd16_UPD:
+ case ARM::VLD4LNd32_UPD:
+ case ARM::VLD4LNq16_UPD:
+ case ARM::VLD4LNq32_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
+
+ if (isZeroCost(DefMCID.Opcode))
+ return 0;
+
+ if (!ItinData || ItinData->isEmpty())
+ return DefMCID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode()) {
+ int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
+ if (Subtarget.isCortexA9())
+ return Latency <= 2 ? 1 : Latency - 1;
+ else
+ return Latency <= 3 ? 1 : Latency - 2;
+ }
+
+ const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Class = MCID.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ if (UOps)
+ return ItinData->getStageLatency(Class);
+ return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+ }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (DDomain == ARMII::DomainGeneral) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 2);
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
+ return false;
+ }
+ return true;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc,
+ bool &NegAcc, bool &HasLane) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+ if (I == MLxEntryMap.end())
+ return false;
+
+ const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+ MulOpc = Entry.MulOpc;
+ AddSubOpc = Entry.AddSubOpc;
+ NegAcc = Entry.NegAcc;
+ HasLane = Entry.HasLane;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Execution domains.
+//===----------------------------------------------------------------------===//
+//
+// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
+// and some can go down both. The vmov instructions go down the VFP pipeline,
+// but they can be changed to vorr equivalents that are executed by the NEON
+// pipeline.
+//
+// We use the following execution domain numbering:
+//
+enum ARMExeDomain {
+ ExeGeneric = 0,
+ ExeVFP = 1,
+ ExeNEON = 2
+};
+//
+// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
+//
+std::pair<uint16_t, uint16_t>
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
+ // predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
+ // No other instructions can be swizzled, so just determine their domain.
+ unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+
+ if (Domain & ARMII::DomainNEON)
+ return std::make_pair(ExeNEON, 0);
+
+ // Certain instructions can go either way on Cortex-A8.
+ // Treat them as NEON instructions.
+ if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
+ return std::make_pair(ExeNEON, 0);
+
+ if (Domain & ARMII::DomainVFP)
+ return std::make_pair(ExeVFP, 0);
+
+ return std::make_pair(ExeGeneric, 0);
+}
+
+void
+ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ // We only know how to change VMOVD into VORR.
+ assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
+ if (Domain != ExeNEON)
+ return;
+
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 0000000..0f9f321
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,384 @@
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
+
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
+ const ARMSubtarget &Subtarget;
+
+protected:
+ // Can be only subclassed.
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+public:
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+ const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+ }
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
+ virtual bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+ /// should be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const {
+ return NumCycles == 1;
+ }
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpMask, int &CmpValue) const;
+
+ /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+
+ /// VFP/NEON execution domains.
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+
+private:
+ int getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const;
+
+ /// verifyInstruction - Perform target specific instruction verification.
+ bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const;
+
+private:
+ /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+ /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+ /// MLx table.
+ DenseMap<unsigned, unsigned> MLxEntryMap;
+
+ /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+ /// stalls when scheduled together with fp MLA / MLS opcodes.
+ SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+ /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+ /// instruction.
+ bool isFpMLxInstruction(unsigned Opcode) const {
+ return MLxEntryMap.count(Opcode);
+ }
+
+ /// isFpMLxInstruction - This version also returns the multiply opcode and the
+ /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+ /// the MLX instructions with an extra lane operand.
+ bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc, bool &NegAcc,
+ bool &HasLane) const;
+
+ /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+ /// will cause stalls when scheduled after (within 4-cycle window) a fp
+ /// MLA / MLS instruction.
+ bool canCauseFpMLxStall(unsigned Opcode) const {
+ return MLxHazardOpcodes.count(Opcode);
+ }
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+ bool isDead = false) {
+ return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+ return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+ return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+ return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+ Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+ return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
+/// the instruction is encoded with an 'S' bit is determined by the optional
+/// CPSR def operand.
+unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = 0);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 0000000..7c42342
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,1208 @@
+//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
+}
+
+const unsigned*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
+ // register.
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // FIXME: avoid re-calculating this every time.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
+ if (TFI->hasFP(MF))
+ Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ // Reserve D16-D31 if the subtarget doesn't support them.
+ if (!STI.hasVFP3() || STI.hasD16()) {
+ assert(ARM::D31 == ARM::D16 + 15);
+ for (unsigned i = 0; i != 16; ++i)
+ Reserved.set(ARM::D16 + i);
+ }
+ return Reserved;
+}
+
+bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
+ unsigned Reg) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (Reg) {
+ default: break;
+ case ARM::SP:
+ case ARM::PC:
+ return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
+ case ARM::R7:
+ case ARM::R11:
+ if (FramePtr == Reg && TFI->hasFP(MF))
+ return true;
+ break;
+ case ARM::R9:
+ return STI.isR9Reserved();
+ }
+
+ return false;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ switch (SubIdx) {
+ default: return 0;
+ case ARM::ssub_0:
+ case ARM::ssub_1:
+ case ARM::ssub_2:
+ case ARM::ssub_3: {
+ // S sub-registers.
+ if (A->getSize() == 8) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::DPR_8RegClass;
+ assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
+ if (A == &ARM::DPR_8RegClass)
+ return A;
+ return &ARM::DPR_VFP2RegClass;
+ }
+
+ if (A->getSize() == 16) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::QPR_8RegClass;
+ return &ARM::QPR_VFP2RegClass;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::SPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return &ARM::QQPR_VFP2RegClass;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::dsub_0:
+ case ARM::dsub_1:
+ case ARM::dsub_2:
+ case ARM::dsub_3: {
+ // D sub-registers.
+ if (A->getSize() == 16) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B != &ARM::DPRRegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+ case ARM::dsub_4:
+ case ARM::dsub_5:
+ case ARM::dsub_6:
+ case ARM::dsub_7: {
+ // D sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+
+ case ARM::qsub_0:
+ case ARM::qsub_1: {
+ // Q sub-registers.
+ if (A->getSize() == 32) {
+ if (B == &ARM::QPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::QPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::qsub_2:
+ case ARM::qsub_3: {
+ // Q sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ }
+ return 0;
+}
+
+bool
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+
+ unsigned Size = RC->getSize() * 8;
+ if (Size < 6)
+ return 0;
+
+ NewSubIdx = 0; // Whole register.
+ unsigned NumRegs = SubIndices.size();
+ if (NumRegs == 8) {
+ // 8 D registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[0] == ARM::dsub_0 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3 &&
+ SubIndices[4] == ARM::dsub_4 &&
+ SubIndices[5] == ARM::dsub_5 &&
+ SubIndices[6] == ARM::dsub_6 &&
+ SubIndices[7] == ARM::dsub_7);
+ } else if (NumRegs == 4) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 4 Q registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[1] == ARM::qsub_1 &&
+ SubIndices[2] == ARM::qsub_2 &&
+ SubIndices[3] == ARM::qsub_3);
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 4 D registers -> 1 QQ register.
+ if (Size >= 256 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 4 D registers -> 1 QQ register (2nd).
+ if (Size == 512 &&
+ SubIndices[1] == ARM::dsub_5 &&
+ SubIndices[2] == ARM::dsub_6 &&
+ SubIndices[3] == ARM::dsub_7) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 4 S registers -> 1 Q register.
+ if (Size >= 128 &&
+ SubIndices[1] == ARM::ssub_1 &&
+ SubIndices[2] == ARM::ssub_2 &&
+ SubIndices[3] == ARM::ssub_3) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ }
+ } else if (NumRegs == 2) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 2 Q registers -> 1 QQ register.
+ if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::qsub_2) {
+ // 2 Q registers -> 1 QQ register (2nd).
+ if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 2 D registers -> 1 Q register.
+ if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_2) {
+ // 2 D registers -> 1 Q register (2nd).
+ if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+ NewSubIdx = ARM::qsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+ NewSubIdx = ARM::qsub_2;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_6) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+ NewSubIdx = ARM::qsub_3;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 2 S registers -> 1 D register.
+ if (SubIndices[1] == ARM::ssub_1) {
+ if (Size >= 128)
+ NewSubIdx = ARM::dsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_2) {
+ // 2 S registers -> 1 D register (2nd).
+ if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+ NewSubIdx = ARM::dsub_1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+const TargetRegisterClass*
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+ do {
+ switch (Super->getID()) {
+ case ARM::GPRRegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::DPRRegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QQPRRegClassID:
+ case ARM::QQQQPRRegClassID:
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return ARM::GPRRegisterClass;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &ARM::CCRRegClass)
+ return 0; // Can't copy CCR registers.
+ return RC;
+}
+
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
+/// getRawAllocationOrder - Returns the register allocation order for a
+/// specified register class with a target-dependent hint.
+ArrayRef<unsigned>
+ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ // Alternative register allocation orders when favoring even / odd registers
+ // of register pairs.
+
+ // No FP, R9 is available.
+ static const unsigned GPREven1[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd1[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R7, R9 is available.
+ static const unsigned GPREven2[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd2[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R11, R9 is available.
+ static const unsigned GPREven3[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9
+ };
+ static const unsigned GPROdd3[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+ ARM::R8
+ };
+
+ // No FP, R9 is not available.
+ static const unsigned GPREven4[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd4[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R7, R9 is not available.
+ static const unsigned GPREven5[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd5[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R11, R9 is not available.
+ static const unsigned GPREven6[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+ };
+ static const unsigned GPROdd6[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+ };
+
+ // We only support even/odd hints for GPR and rGPR.
+ if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+ return RC->getRawAllocationOrder(MF);
+
+ if (HintType == ARMRI::RegPairEven) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return RC->getRawAllocationOrder(MF);
+
+ if (!TFI->hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return makeArrayRef(GPREven1);
+ else
+ return makeArrayRef(GPREven4);
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return makeArrayRef(GPREven2);
+ else
+ return makeArrayRef(GPREven5);
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return makeArrayRef(GPREven3);
+ else
+ return makeArrayRef(GPREven6);
+ }
+ } else if (HintType == ARMRI::RegPairOdd) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return RC->getRawAllocationOrder(MF);
+
+ if (!TFI->hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return makeArrayRef(GPROdd1);
+ else
+ return makeArrayRef(GPROdd4);
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return makeArrayRef(GPROdd2);
+ else
+ return makeArrayRef(GPROdd5);
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return makeArrayRef(GPROdd3);
+ else
+ return makeArrayRef(GPROdd6);
+ }
+ }
+ return RC->getRawAllocationOrder(MF);
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const {
+ if (Reg == 0 || !isPhysicalRegister(Reg))
+ return 0;
+ if (Type == 0)
+ return Reg;
+ else if (Type == (unsigned)ARMRI::RegPairOdd)
+ // Odd register.
+ return getRegisterPairOdd(Reg, MF);
+ else if (Type == (unsigned)ARMRI::RegPairEven)
+ // Even register.
+ return getRegisterPairEven(Reg, MF);
+ return 0;
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+ Hint.first == (unsigned)ARMRI::RegPairEven) &&
+ TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ // If 'Reg' is one of the even / odd register pair and it's now changed
+ // (e.g. coalesced) into a different register. The other register of the
+ // pair allocation hint must be updated to reflect the relationship
+ // change.
+ unsigned OtherReg = Hint.second;
+ Hint = MRI->getRegAllocationHint(OtherReg);
+ if (Hint.second == Reg)
+ // Make sure the pair has not already divorced.
+ MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ }
+}
+
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ // CortexA9 has a Write-after-write hazard for NEON registers.
+ if (!STI.isCortexA9())
+ return false;
+
+ switch (RC->getID()) {
+ case ARM::DPRRegClassID:
+ case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::SPR_8RegClassID:
+ // Avoid reusing S, D, and Q registers.
+ // Don't increase register pressure for QQ and QQQQ.
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
+}
+
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ return true;
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return FramePtr;
+ return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R1: return ARM::R0;
+ case ARM::R3: return ARM::R2;
+ case ARM::R5: return ARM::R4;
+ case ARM::R7:
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
+ case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+ case ARM::S1: return ARM::S0;
+ case ARM::S3: return ARM::S2;
+ case ARM::S5: return ARM::S4;
+ case ARM::S7: return ARM::S6;
+ case ARM::S9: return ARM::S8;
+ case ARM::S11: return ARM::S10;
+ case ARM::S13: return ARM::S12;
+ case ARM::S15: return ARM::S14;
+ case ARM::S17: return ARM::S16;
+ case ARM::S19: return ARM::S18;
+ case ARM::S21: return ARM::S20;
+ case ARM::S23: return ARM::S22;
+ case ARM::S25: return ARM::S24;
+ case ARM::S27: return ARM::S26;
+ case ARM::S29: return ARM::S28;
+ case ARM::S31: return ARM::S30;
+
+ case ARM::D1: return ARM::D0;
+ case ARM::D3: return ARM::D2;
+ case ARM::D5: return ARM::D4;
+ case ARM::D7: return ARM::D6;
+ case ARM::D9: return ARM::D8;
+ case ARM::D11: return ARM::D10;
+ case ARM::D13: return ARM::D12;
+ case ARM::D15: return ARM::D14;
+ case ARM::D17: return ARM::D16;
+ case ARM::D19: return ARM::D18;
+ case ARM::D21: return ARM::D20;
+ case ARM::D23: return ARM::D22;
+ case ARM::D25: return ARM::D24;
+ case ARM::D27: return ARM::D26;
+ case ARM::D29: return ARM::D28;
+ case ARM::D31: return ARM::D30;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R0: return ARM::R1;
+ case ARM::R2: return ARM::R3;
+ case ARM::R4: return ARM::R5;
+ case ARM::R6:
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
+ case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+ case ARM::S0: return ARM::S1;
+ case ARM::S2: return ARM::S3;
+ case ARM::S4: return ARM::S5;
+ case ARM::S6: return ARM::S7;
+ case ARM::S8: return ARM::S9;
+ case ARM::S10: return ARM::S11;
+ case ARM::S12: return ARM::S13;
+ case ARM::S14: return ARM::S15;
+ case ARM::S16: return ARM::S17;
+ case ARM::S18: return ARM::S19;
+ case ARM::S20: return ARM::S21;
+ case ARM::S22: return ARM::S23;
+ case ARM::S24: return ARM::S25;
+ case ARM::S26: return ARM::S27;
+ case ARM::S28: return ARM::S29;
+ case ARM::S30: return ARM::S31;
+
+ case ARM::D0: return ARM::D1;
+ case ARM::D2: return ARM::D3;
+ case ARM::D4: return ARM::D5;
+ case ARM::D6: return ARM::D7;
+ case ARM::D8: return ARM::D9;
+ case ARM::D10: return ARM::D11;
+ case ARM::D12: return ARM::D13;
+ case ARM::D14: return ARM::D15;
+ case ARM::D16: return ARM::D17;
+ case ARM::D18: return ARM::D19;
+ case ARM::D20: return ARM::D21;
+ case ARM::D22: return ARM::D23;
+ case ARM::D24: return ARM::D25;
+ case ARM::D26: return ARM::D27;
+ case ARM::D28: return ARM::D29;
+ case ARM::D30: return ARM::D31;
+ }
+
+ return 0;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C =
+ ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+}
+
+
+void ARMBaseRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+ case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = TFI->getStackAlignment();
+ if (TFI->hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
+
+ MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset));
+
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(MIB);
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode_i12:
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMFrameLowering *TFI =
+ static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ unsigned FrameReg;
+
+ int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+ }
+ if (Done)
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert((Offset ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+ "This code isn't needed if offset already handled!");
+
+ unsigned ScratchReg = 0;
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ if (Offset == 0)
+ // Must be addrmode4/6.
+ MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+ else {
+ ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+ if (!AFI->isThumbFunction())
+ emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ }
+ // Update the original instruction to use the scratch register.
+ MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 0000000..fee17ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,201 @@
+//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+ enum {
+ RegPairOdd = 1,
+ RegPairEven = 2
+ };
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case LR: case SP: case PC:
+ return true;
+ case R8: case R9: case R10: case R11:
+ // For darwin we want r7 and lr to be next to each other.
+ return !isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R8: case R9: case R10: case R11:
+ // Darwin has this second area.
+ return isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case D15: case D14: case D13: case D12:
+ case D11: case D10: case D9: case D8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+ const ARMBaseInstrInfo &TII;
+ const ARMSubtarget &STI;
+
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
+ // Can be only subclassed.
+ explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &STI);
+
+ // Return the opcode that implements 'Op', or 0 if no opcode
+ unsigned getOpcode(int Op) const;
+
+public:
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ /// canCombineSubRegIndices - Given a register class and a list of
+ /// subregister indices, return true if it's possible to combine the
+ /// subregister indices into one that corresponds to a larger
+ /// subregister. Return the new subregister index by reference. Note the
+ /// new index may be zero if the given subregisters can be combined to
+ /// form the whole register.
+ virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const;
+
+ unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const;
+
+ void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const;
+
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
+ bool hasBasePointer(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
+ bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getBaseRegister() const { return BasePtr; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ bool isLowRegister(unsigned Reg) const;
+
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags)const;
+
+ /// Code Generation virtual methods...
+ virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+private:
+ unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+ unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 0000000..69eddf0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,131 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.08).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+ enum SpecialAttr {
+ // This is for the .cpu asm attr. It translates into one or more
+ // AttrType (below) entries in the .ARM.attributes section in the ELF.
+ SEL_CPU
+ };
+
+ enum AttrType {
+ // Rest correspond to ELF/.ARM.attributes
+ File = 1,
+ Section = 2,
+ Symbol = 3,
+ CPU_raw_name = 4,
+ CPU_name = 5,
+ CPU_arch = 6,
+ CPU_arch_profile = 7,
+ ARM_ISA_use = 8,
+ THUMB_ISA_use = 9,
+ VFP_arch = 10,
+ WMMX_arch = 11,
+ Advanced_SIMD_arch = 12,
+ PCS_config = 13,
+ ABI_PCS_R9_use = 14,
+ ABI_PCS_RW_data = 15,
+ ABI_PCS_RO_data = 16,
+ ABI_PCS_GOT_use = 17,
+ ABI_PCS_wchar_t = 18,
+ ABI_FP_rounding = 19,
+ ABI_FP_denormal = 20,
+ ABI_FP_exceptions = 21,
+ ABI_FP_user_exceptions = 22,
+ ABI_FP_number_model = 23,
+ ABI_align8_needed = 24,
+ ABI_align8_preserved = 25,
+ ABI_enum_size = 26,
+ ABI_HardFP_use = 27,
+ ABI_VFP_args = 28,
+ ABI_WMMX_args = 29,
+ ABI_optimization_goals = 30,
+ ABI_FP_optimization_goals = 31,
+ compatibility = 32,
+ CPU_unaligned_access = 34,
+ VFP_HP_extension = 36,
+ ABI_FP_16bit_format = 38,
+ MPextension_use = 42, // was 70, 2.08 ABI
+ DIV_use = 44,
+ nodefaults = 64,
+ also_compatible_with = 65,
+ T2EE_use = 66,
+ conformance = 67,
+ Virtualization_use = 68,
+ MPextension_use_old = 70
+ };
+
+ // Magic numbers for .ARM.attributes
+ enum AttrMagic {
+ Format_Version = 0x41
+ };
+
+ // Legal Values for CPU_arch, (=6), uleb128
+ enum CPUArch {
+ Pre_v4 = 0,
+ v4 = 1, // e.g. SA110
+ v4T = 2, // e.g. ARM7TDMI
+ v5T = 3, // e.g. ARM9TDMI
+ v5TE = 4, // e.g. ARM946E_S
+ v5TEJ = 5, // e.g. ARM926EJ_S
+ v6 = 6, // e.g. ARM1136J_S
+ v6KZ = 7, // e.g. ARM1176JZ_S
+ v6T2 = 8, // e.g. ARM1156T2F_S
+ v6K = 9, // e.g. ARM1136J_S
+ v7 = 10, // e.g. Cortex A8, Cortex M3
+ v6_M = 11, // e.g. Cortex M1
+ v6S_M = 12, // v6_M with the System extensions
+ v7E_M = 13 // v7_M with DSP extensions
+ };
+
+ enum CPUArchProfile { // (=7), uleb128
+ Not_Applicable = 0, // pre v7, or cross-profile code
+ ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+ RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+ MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+ SystemProfile = (0x53) // 'S' Application or real-time profile
+ };
+
+ // The following have a lot of common use cases
+ enum {
+ //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ Not_Allowed = 0,
+ Allowed = 1,
+
+ // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+ AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_ABI_FP_denormal, (=20), uleb128
+ PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+ // Tag_ABI_FP_number_model, (=23), uleb128
+ AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
+ AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 0000000..ff7db1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+ if (Reg == 0) {
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 0000000..d2981c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,164 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+ "ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 8>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 0000000..4148d4a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1903 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+ class ARMCodeEmitter : public MachineFunctionPass {
+ ARMJITInfo *JTI;
+ const ARMInstrInfo *II;
+ const TargetData *TD;
+ const ARMSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+ bool IsThumb;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+ public:
+ ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const ARMInstrInfo *)tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Binary);
+ void emitDWordLE(uint64_t Binary);
+ void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
+ void emitMOVi2piecesInstruction(const MachineInstr &MI);
+ void emitLEApcrelJTInstruction(const MachineInstr &MI);
+ void emitPseudoMoveInstruction(const MachineInstr &MI);
+ void addPCLabel(unsigned LabelID);
+ void emitPseudoInstruction(const MachineInstr &MI);
+ unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+ const MCInstrDesc &MCID,
+ const MachineOperand &MO,
+ unsigned OpIdx);
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm);
+ unsigned getAddrModeSBit(const MachineInstr &MI,
+ const MCInstrDesc &MCID) const;
+
+ void emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMulFrmInstruction(const MachineInstr &MI);
+
+ void emitExtendInstruction(const MachineInstr &MI);
+
+ void emitMiscArithInstruction(const MachineInstr &MI);
+
+ void emitSaturateInstruction(const MachineInstr &MI);
+
+ void emitBranchInstruction(const MachineInstr &MI);
+
+ void emitInlineJumpTable(unsigned JTIndex);
+
+ void emitMiscBranchInstruction(const MachineInstr &MI);
+
+ void emitVFPArithInstruction(const MachineInstr &MI);
+
+ void emitVFPConversionInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+ // TableGen'erated getBinaryCodeForInstr() function to encode any
+ // operand values, instead querying getMachineOpValue() directly for
+ // each operand it needs to encode. Thus, any of the new encoder
+ // helper functions can simply return 0 as the values the return
+ // are already handled elsewhere. They are placeholders to allow this
+ // encoder to continue to function until the MC encoder is sufficiently
+ // far along that this one can be eliminated entirely.
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+ const { return 0; }
+ unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
+ unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getSsatBitPosValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const {return 0; }
+ uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0; }
+
+ unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+ uint32_t Binary;
+ Binary = Imm12 & 0xfff;
+ if (Imm12 >= 0)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+
+ unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+ return 0;
+ }
+
+ uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
+ }
+
+ uint32_t Binary = Imm12 & 0xfff;
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+ unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(unsigned Imm) const ;
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV = 0) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+ intptr_t JTBase = 0) const;
+ };
+}
+
+char ARMCodeEmitter::ID = 0;
+
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// code to the specified MCE object.
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new ARMCodeEmitter(TM, JCE);
+}
+
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+ II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+ TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+ JTI->Initialize(MF, IsPIC);
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
+ switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+}
+
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable("Unsupported operand type for movw/movt");
+ }
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return getARMRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+ else if (MO.isCPI()) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ // For VFP load, the immediate offset is multiplied by 4.
+ unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+ ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ } else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV) const {
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ ACPV, MayNeedFarStub)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), ACPV,
+ MayNeedFarStub);
+ MCE.addRelocation(MR);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ // Tell JIT emitter we'll resolve the address.
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc,
+ intptr_t JTBase) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB, JTBase));
+}
+
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitDWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+ default: {
+ llvm_unreachable("Unhandled instruction encoding format!");
+ break;
+ }
+ case ARMII::MiscFrm:
+ if (MI.getOpcode() == ARM::LEApcrelJT) {
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ }
+ llvm_unreachable("Unhandled instruction encoding!");
+ break;
+ case ARMII::Pseudo:
+ emitPseudoInstruction(MI);
+ break;
+ case ARMII::DPFrm:
+ case ARMII::DPSoRegFrm:
+ emitDataProcessingInstruction(MI);
+ break;
+ case ARMII::LdFrm:
+ case ARMII::StFrm:
+ emitLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdMiscFrm:
+ case ARMII::StMiscFrm:
+ emitMiscLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdStMulFrm:
+ emitLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::MulFrm:
+ emitMulFrmInstruction(MI);
+ break;
+ case ARMII::ExtFrm:
+ emitExtendInstruction(MI);
+ break;
+ case ARMII::ArithMiscFrm:
+ emitMiscArithInstruction(MI);
+ break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
+ case ARMII::BrFrm:
+ emitBranchInstruction(MI);
+ break;
+ case ARMII::BrMiscFrm:
+ emitMiscBranchInstruction(MI);
+ break;
+ // VFP instructions.
+ case ARMII::VFPUnaryFrm:
+ case ARMII::VFPBinaryFrm:
+ emitVFPArithInstruction(MI);
+ break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ emitVFPConversionInstruction(MI);
+ break;
+ case ARMII::VFPLdStFrm:
+ emitVFPLoadStoreInstruction(MI);
+ break;
+ case ARMII::VFPLdStMulFrm:
+ emitVFPLoadStoreMultipleInstruction(MI);
+ break;
+
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
+ unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
+ unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+ const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+ // Remember the CONSTPOOL_ENTRY address for later relocation.
+ JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+ // Emit constpool island entry. In most cases, the actual values will be
+ // resolved and relocated after code emission.
+ if (MCPE.isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
+
+ assert(ACPV->isGlobalValue() && "unsupported constant pool value");
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
+ if (GV) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+ isa<Function>(GV),
+ Subtarget->GVIsIndirectSymbol(GV, RelocM),
+ (intptr_t)ACPV);
+ } else {
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
+ }
+ emitWordLE(0);
+ } else {
+ const Constant *CV = MCPE.Val.ConstVal;
+
+ DEBUG({
+ errs() << " ** Constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " ";
+ if (const Function *F = dyn_cast<Function>(CV))
+ errs() << F->getName();
+ else
+ errs() << *CV;
+ errs() << '\n';
+ });
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
+ emitWordLE(0);
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint32_t Val = uint32_t(*CI->getValue().getRawData());
+ emitWordLE(Val);
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (CFP->getType()->isFloatTy())
+ emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else if (CFP->getType()->isDoubleTy())
+ emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ } else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ }
+}
+
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ // Emit the 'movw' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm12
+ Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+ emitWordLE(Binary);
+
+ unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+ // Emit the 'movt' instruction.
+ Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm1, same as movw above.
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+ "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V1);
+ emitWordLE(Binary);
+
+ // Now the 'orr' instruction.
+ Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V2);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+ // It's basically add r, pc, (LJTI - $+8)
+
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Emit the 'add' instruction.
+ unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode Rn which is PC.
+ Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+ // Encode the displacement.
+ Binary |= 1 << ARMII::I_BitShift;
+ emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+ Binary |= 1 << ARMII::S_BitShift;
+
+ // Encode register def if there is one.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode the shift operation.
+ switch (Opcode) {
+ default: break;
+ case ARM::RRX:
+ // rrx
+ Binary |= 0x6 << 4;
+ break;
+ case ARM::MOVsrl_flag:
+ // lsr #1
+ Binary |= (0x2 << 4) | (1 << 7);
+ break;
+ case ARM::MOVsra_flag:
+ // asr #1
+ Binary |= (0x4 << 4) | (1 << 7);
+ break;
+ }
+
+ // Encode register Rm.
+ Binary |= getMachineOpValue(MI, 1);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
+ DEBUG(errs() << " ** LPC" << LabelID << " @ "
+ << (void*)MCE.getCurrentPCValue() << '\n');
+ JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL:
+ case ARM::BXr9_CALL:
+ case ARM::BMOVPCRXr9_CALL: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
+ case TargetOpcode::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ report_fatal_error("JIT does not support inline asm!");
+ }
+ break;
+ }
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ // Do nothing.
+ break;
+ case ARM::CONSTPOOL_ENTRY:
+ emitConstPoolInstruction(MI);
+ break;
+ case ARM::PICADD: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // PICADD is just an add instruction that implicitly read pc.
+ emitDataProcessingInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICSTR:
+ case ARM::PICSTRB: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitLoadStoreInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDRH:
+ case ARM::PICLDRSH:
+ case ARM::PICLDRSB:
+ case ARM::PICSTRH: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitMiscLoadStoreInstruction(MI, ARM::PC);
+ break;
+ }
+
+ case ARM::MOVi32imm:
+ // Two instructions to materialize a constant.
+ if (Subtarget->hasV6T2Ops())
+ emitMOVi32immInstruction(MI);
+ else
+ emitMOVi2piecesInstruction(MI);
+ break;
+
+ case ARM::LEApcrelJT:
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ case ARM::RRX:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag:
+ emitPseudoMoveInstruction(MI);
+ break;
+ }
+}
+
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
+ const MCInstrDesc &MCID,
+ const MachineOperand &MO,
+ unsigned OpIdx) {
+ unsigned Binary = getMachineOpValue(MI, MO);
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
+ const MCInstrDesc &MCID) const {
+ for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
+ const MachineOperand &MO = MI.getOperand(i-1);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+ return 1 << ARMII::S_BitShift;
+ }
+ return 0;
+}
+
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // Encode register def if there is one.
+ unsigned NumDefs = MCID.getNumDefs();
+ unsigned OpIdx = 0;
+ if (NumDefs)
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ else if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+
+ if (MCID.Opcode == ARM::MOVi16) {
+ // Get immediate from MI.
+ unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movw);
+ // Encode imm which is the same as in emitMOVi32immInstruction().
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if(MCID.Opcode == ARM::MOVTi16) {
+ unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movt) >> 16);
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
+ uint32_t v = ~MI.getOperand(2).getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ // Instr{20-16} = msb, Instr{11-7} = lsb
+ Binary |= (msb & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
+ // Encode Rn in Instr{0-3}
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+ uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+ // Instr{20-16} = widthm1, Instr{11-7} = lsb
+ Binary |= (widthm1 & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ }
+
+ // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode first non-shifter register operand if there is one.
+ bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
+ if (!isUnary) {
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else {
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+ ++OpIdx;
+ }
+ }
+
+ // Encode shifter operand.
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+ // Encode SoReg.
+ emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
+ return;
+ }
+
+ if (MO.isReg()) {
+ // Encode register Rm.
+ emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ return;
+ }
+
+ // Encode so_imm.
+ Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+ if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+ MI.getOpcode() == ARM::STRi12) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDR_PRE.
+ if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM2Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative).
+ Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+ if (!MO2.getReg()) { // is immediate
+ if (ARM_AM::getAM2Offset(AM2Opc))
+ // Set the value of offset_12 field
+ Binary |= ARM_AM::getAM2Offset(AM2Opc);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set bit I(25), because this is not in immediate encoding.
+ Binary |= 1 << ARMII::I_BitShift;
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ // Set bit[3:0] to the corresponding Rm register
+ Binary |= getARMRegisterNumbering(MO2.getReg());
+
+ // If this instr is in scaled register offset/index instruction, set
+ // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+ if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+ Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
+ Binary |= ShImm << ARMII::ShiftShift; // shift_immed
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StMiscFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Skip LDRD and STRD's second operand.
+ if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
+ ++OpIdx;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDRH_POST.
+ if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM3Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative)
+ Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+
+ // If this instr is in register offset/index encoding, set bit[3:0]
+ // to the corresponding Rm register.
+ if (MO2.getReg()) {
+ Binary |= getARMRegisterNumbering(MO2.getReg());
+ emitWordLE(Binary);
+ return;
+ }
+
+ // This instr is in immediate offset/index encoding, set bit 22 to 1.
+ Binary |= 1 << ARMII::AM3_I_BitShift;
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+ // Set operands
+ Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
+ Binary |= (ImmOffs & 0xF); // immedL
+ }
+
+ emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+ unsigned Binary = 0;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ // IA - Increment after - bit U = 1 and bit P = 0
+ // IB - Increment before - bit U = 1 and bit P = 1
+ // DA - Decrement after - bit U = 0 and bit P = 0
+ // DB - Decrement before - bit U = 0 and bit P = 1
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::da: break;
+ case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+ case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+ case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+ }
+
+ return Binary;
+}
+
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // Set registers
+ for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ RegNum < 16);
+ Binary |= 0x1 << RegNum;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // 32x32->64bit operations have two destination registers. The number
+ // of register definitions will tell us if that's what we're dealing with.
+ unsigned OpIdx = 0;
+ if (MCID.getNumDefs() == 2)
+ Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode Rs
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+ // Many multiple instructions (e.g. MLA) have three src operands. Encode
+ // it as Rn (for multiply, that's in the same offset as RdLo.
+ if (MCID.getNumOperands() > OpIdx &&
+ !MCID.OpInfo[OpIdx].isPredicate() &&
+ !MCID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ if (MO2.isReg()) {
+ // Two register operand form.
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, MO2);
+ ++OpIdx;
+ } else {
+ Binary |= getMachineOpValue(MI, MO1);
+ }
+
+ // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+ if (MI.getOperand(OpIdx).isImm() &&
+ !MCID.OpInfo[OpIdx].isPredicate() &&
+ !MCID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // PKH instructions are finished at this point
+ if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx++);
+ if (OpIdx == MCID.getNumOperands() ||
+ MCID.OpInfo[OpIdx].isPredicate() ||
+ MCID.OpInfo[OpIdx].isOptionalDef()) {
+ // Encode Rm and it's done.
+ Binary |= getMachineOpValue(MI, MO);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode shift_imm.
+ unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (MCID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ MCID.Opcode != ARM::SSAT16 &&
+ MCID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (MCID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ if (MCID.Opcode == ARM::TPsoft) {
+ llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set signed_immed_24 field
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
+ // Remember the base address of the inline jump table.
+ uintptr_t JTBase = MCE.getCurrentPCValue();
+ JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+ DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+ << '\n');
+
+ // Now emit the jump table entries.
+ const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ if (IsPIC)
+ // DestBB address - JT base.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+ else
+ // Absolute DestBB address.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+ emitWordLE(0);
+ }
+}
+
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Handle jump tables.
+ if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
+ // First emit a ldr pc, [] instruction.
+ emitDataProcessingInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ unsigned JTIndex =
+ (MCID.Opcode == ARM::BR_JTr)
+ ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+ emitInlineJumpTable(JTIndex);
+ return;
+ } else if (MCID.Opcode == ARM::BR_JTm) {
+ // First emit a ldr pc, [] instruction.
+ emitLoadStoreInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ emitInlineJumpTable(MI.getOperand(3).getIndex());
+ return;
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
+ // The return register is LR.
+ Binary |= getARMRegisterNumbering(ARM::LR);
+ else
+ // otherwise, set the return register
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+ RegD = getARMRegisterNumbering(RegD);
+ if (!isSPVFP)
+ Binary |= RegD << ARMII::RegRdShift;
+ else {
+ Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+ Binary |= (RegD & 0x01) << ARMII::D_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+ RegN = getARMRegisterNumbering(RegN);
+ if (!isSPVFP)
+ Binary |= RegN << ARMII::RegRnShift;
+ else {
+ Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+ Binary |= (RegN & 0x01) << ARMII::N_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+ RegM = getARMRegisterNumbering(RegM);
+ if (!isSPVFP)
+ Binary |= RegM;
+ else {
+ Binary |= ((RegM & 0x1E) >> 1);
+ Binary |= (RegM & 0x01) << ARMII::M_BitShift;
+ }
+ return Binary;
+}
+
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+ assert((Binary & ARMII::D_BitShift) == 0 &&
+ (Binary & ARMII::N_BitShift) == 0 &&
+ (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // If this is a two-address operand, skip it, e.g. FMACD.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode Dn / Sn.
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+ Binary |= encodeVFPRn(MI, OpIdx++);
+
+ if (OpIdx == MCID.getNumOperands() ||
+ MCID.OpInfo[OpIdx].isPredicate() ||
+ MCID.OpInfo[OpIdx].isOptionalDef()) {
+ // FCMPEZD etc. has only one operand.
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, OpIdx);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+ break;
+ case ARMII::VFPConv4Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 0);
+ break;
+ case ARMII::VFPConv5Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 0);
+ break;
+ }
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 1);
+ break;
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 1);
+ break;
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 1);
+ break;
+ }
+
+ if (Form == ARMII::VFPConv5Frm)
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 2);
+ else if (Form == ARMII::VFPConv3Frm)
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 2);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // Encode address base.
+ const MachineOperand &Base = MI.getOperand(OpIdx++);
+ Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+ // If there is a non-zero immediate offset, encode it.
+ if (Base.isReg()) {
+ const MachineOperand &Offset = MI.getOperand(OpIdx);
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+ if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+ Binary |= 1 << ARMII::U_BitShift;
+ Binary |= ImmOffs;
+ emitWordLE(Binary);
+ return;
+ }
+ }
+
+ // If immediate offset is omitted, default to +0.
+ Binary |= 1 << ARMII::U_BitShift;
+
+ emitWordLE(Binary);
+}
+
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // First register is encoded in Dd.
+ Binary |= encodeVFPRd(MI, OpIdx+2);
+
+ // Count the number of registers.
+ unsigned NumRegs = 1;
+ for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ ++NumRegs;
+ }
+ // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+ // Otherwise, it will be 0, in the case of 32-bit registers.
+ if(Binary & 0x100)
+ Binary |= NumRegs * 2;
+ else
+ Binary |= NumRegs;
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = getARMRegisterNumbering(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = getARMRegisterNumbering(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = getARMRegisterNumbering(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const MCInstrDesc &MCID = MI.getDesc();
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = getARMRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = getARMRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000..3e3a413
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1919 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+STATISTIC(NumTBs, "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class ARMConstantIslands : public MachineFunctionPass {
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number. The two-byte pads required for Thumb alignment are
+ /// counted as part of the following block (i.e., the offset and size for
+ /// a padded block will both be ==2 mod 4).
+ std::vector<unsigned> BBSizes;
+
+ /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+ /// The two-byte pads required for Thumb alignment are counted as part of
+ /// the following block.
+ std::vector<unsigned> BBOffsets;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ unsigned MaxDisp;
+ bool NegOk;
+ bool IsSoImm;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg, bool soimm)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ HighWaterMark = CPEMI->getParent();
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+ SmallVector<MachineInstr*, 4> T2JumpTables;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ /// HasInlineAsm - True if the function contains inline assembly.
+ bool HasInlineAsm;
+
+ const ARMInstrInfo *TII;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ bool isThumb1;
+ bool isThumb2;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &MF,
+ std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void JumpTableFunctionScan(MachineFunction &MF);
+ void InitialFunctionScan(MachineFunction &MF,
+ const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+ int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+ bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
+ void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+ void RemoveDeadCPEMI(MachineInstr *CPEMI);
+ bool RemoveUnusedCPEntries();
+ bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool UndoLRSpillRestore();
+ bool OptimizeThumb2Instructions(MachineFunction &MF);
+ bool OptimizeThumb2Branches(MachineFunction &MF);
+ bool ReorderThumb2JumpTables(MachineFunction &MF);
+ bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
+
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify(MachineFunction &MF);
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &MF) {
+ assert(BBOffsets.size() == BBSizes.size());
+ for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+ assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+ if (!isThumb)
+ return;
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() &&
+ MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned MBBId = MBB->getNumber();
+ assert(HasInlineAsm ||
+ (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+ (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
+ }
+ }
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+ unsigned CPEOffset = GetOffsetOf(U.CPEMI);
+ unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+ UserOffset - CPEOffset;
+ assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+ DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
+ << " size " << BBSizes[J] << "\n");
+ }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
+ MachineConstantPool &MCP = *MF.getConstantPool();
+
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+
+ isThumb = AFI->isThumbFunction();
+ isThumb1 = AFI->isThumb1OnlyFunction();
+ isThumb2 = AFI->isThumb2Function();
+
+ HasFarJump = false;
+ HasInlineAsm = false;
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF.RenumberBlocks();
+
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ JumpTableFunctionScan(MF);
+ MadeChange |= ReorderThumb2JumpTables(MF);
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF.RenumberBlocks();
+ }
+
+ // Thumb1 functions containing constant pools get 4-byte alignment.
+ // This is so we can keep exact track of where the alignment padding goes.
+
+ // ARM and Thumb2 functions need to be 4-byte aligned.
+ if (!isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP.isEmpty()) {
+ DoInitialPlacement(MF, CPEMIs);
+ if (isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
+ }
+
+ /// The next UID to take is the first unused one.
+ AFI->initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(MF, CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+
+ /// Remove dead constant pool entries.
+ MadeChange |= RemoveUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ while (true) {
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= HandleConstantPoolUser(MF, i);
+ if (CPChange && ++NoCPIters > 30)
+ llvm_unreachable("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ llvm_unreachable("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ if (isThumb2 && !STI->prefers32BitThumb())
+ MadeChange |= OptimizeThumb2Instructions(MF);
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify(MF);
+
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
+ if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+ MadeChange |= UndoLRSpillRestore();
+
+ // Save the mapping between original and cloned constpool entries.
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+ const CPEntry & CPE = CPEntries[i][j];
+ AFI->recordCPEClone(i, CPE.CPI);
+ }
+ }
+
+ DEBUG(errs() << '\n'; dumpBBs());
+
+ BBSizes.clear();
+ BBOffsets.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+ T2JumpTables.clear();
+
+ return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
+ std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
+ MF.push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ MF.getConstantPool()->getConstants();
+
+ const TargetData &TD = *MF.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ ++NumCPEs;
+ DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
+ << "\n");
+ }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ // First thing, see if the function has any inline assembly in it. If so,
+ // we have to be conservative about alignment assumptions, as we don't
+ // know for sure the size of any instructions in the inline assembly.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getOpcode() == ARM::INLINEASM)
+ HasInlineAsm = true;
+ }
+
+ // Now go back through the instructions and build up our data structures.
+ unsigned Offset = 0;
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ // Add instruction size to MBBSize.
+ MBBSize += TII->GetInstSizeInBytes(I);
+
+ int Opc = I->getOpcode();
+ if (I->getDesc().isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case ARM::tBR_JTr:
+ // A Thumb1 table jump may involve padding; for the offsets to
+ // be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
+ MF.EnsureAlignment(2U);
+ if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
+ // FIXME: Add a pseudo ALIGN instruction instead.
+ MBBSize += 2; // padding
+ continue; // Does not get an entry in ImmBranches
+ case ARM::t2BR_JT:
+ T2JumpTables.push_back(I);
+ continue; // Does not get an entry in ImmBranches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc:
+ isCond = true;
+ UOpc = ARM::t2B;
+ Bits = 20;
+ Scale = 2;
+ break;
+ case ARM::t2B:
+ Bits = 24;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ if (Opc == ARM::CONSTPOOL_ENTRY)
+ continue;
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ bool IsSoImm = false;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+ break;
+
+ // Taking the address of a CP entry.
+ case ARM::LEApcrel:
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ Scale = 4;
+ NegOk = true;
+ IsSoImm = true;
+ break;
+ case ARM::t2LEApcrel:
+ Bits = 12;
+ NegOk = true;
+ break;
+ case ARM::tLEApcrel:
+ Bits = 8;
+ Scale = 4;
+ break;
+
+ case ARM::LDRi12:
+ case ARM::LDRcp:
+ case ARM::t2LDRpci:
+ Bits = 12; // +-offset_12
+ NegOk = true;
+ break;
+
+ case ARM::tLDRpci:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ NegOk = true;
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+
+ // In thumb mode, if this block is a constpool island, we may need padding
+ // so it's aligned on 4 byte boundary.
+ if (isThumb &&
+ !MBB.empty() &&
+ MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ ((Offset%4) != 0 || HasInlineAsm))
+ MBBSize += 2;
+
+ BBSizes.push_back(MBBSize);
+ BBOffsets.push_back(Offset);
+ Offset += MBBSize;
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBOffsets[MBB->getNumber()];
+
+ // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+ // alignment padding, and compensate if so.
+ if (isThumb &&
+ MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ (Offset%4 != 0 || HasInlineAsm))
+ Offset += 2;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+ MachineFunction &MF = *OrigBB->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ else
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
+ .addImm(ARMCC::AL).addReg(0);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || !Succ->begin()->isPHI())
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(llvm::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ unsigned OrigBBI = OrigBB->getNumber();
+ unsigned NewBBI = NewBB->getNumber();
+
+ int delta = isThumb1 ? 2 : 4;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
+
+ // ...and adjust BBOffsets for NewBB accordingly.
+ BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
+ // All BBOffsets following these blocks must be modified.
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
+
+ return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK, bool IsSoImm) {
+ // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // Effectively, the valid range of displacements is 2 bytes smaller for such
+ // references.
+ unsigned TotalAdj = 0;
+ if (isThumb && UserOffset%4 !=0) {
+ UserOffset -= 2;
+ TotalAdj = 2;
+ }
+ // CPEs will be rounded up to a multiple of 4.
+ if (isThumb && TrialOffset%4 != 0) {
+ TrialOffset += 2;
+ TotalAdj += 2;
+ }
+
+ // In Thumb2 mode, later branch adjustments can shift instructions up and
+ // cause alignment change. In the worst case scenario this can cause the
+ // user's effective address to be subtracted by 2 and the CPE's address to
+ // be plus 2.
+ if (isThumb2 && TotalAdj != 4)
+ MaxDisp -= (4 - TotalAdj);
+
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ }
+ return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U) {
+ unsigned MaxDisp = U.MaxDisp;
+ unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+ BBSizes[Water->getNumber()];
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction.
+ if (CPEOffset < UserOffset)
+ UserOffset += U.CPEMI->getOperand(2).getImm();
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+ assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+
+ if (DoDump) {
+ DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << " insn address=" << UserOffset
+ << " CPE address=" << CPEOffset
+ << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ }
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+ || PredMI->getOpcode() == ARM::t2B)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+ int delta) {
+ MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
+ for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
+ i < e; ++i) {
+ BBOffsets[i] += delta;
+ // If some existing blocks have padding, adjust the padding as needed, a
+ // bit tricky. delta can be negative so don't use % on that.
+ if (!isThumb)
+ continue;
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() && !HasInlineAsm) {
+ // Constant pool entries require padding.
+ if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned OldOffset = BBOffsets[i] - delta;
+ if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ }
+ }
+ // Thumb1 jump tables require padding. They should be at the end;
+ // following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
+ MachineInstr *ThumbJTMI = prior(MBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
+ unsigned OldMIOffset = NewMIOffset - delta;
+ if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ }
+ }
+ if (delta==0)
+ return;
+ }
+ MBBI = llvm::next(MBBI);
+ }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ RemoveDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
+ DEBUG(errs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
+ DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case ARM::tB:
+ return ((1<<10)-1)*2;
+ case ARM::t2B:
+ return ((1<<23)-1)*2;
+ default:
+ break;
+ }
+
+ return ((1<<23)-1)*4;
+}
+
+/// LookForWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry. For Thumb, prefer water that will not
+/// introduce padding to water that will. To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ bool FoundWaterThatWouldPad = false;
+ water_iterator IPThatWouldPad;
+ for (water_iterator IP = prior(WaterList.end()),
+ B = WaterList.begin();; --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB))) {
+ unsigned WBBId = WaterBB->getNumber();
+ if (isThumb &&
+ (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
+ // This is valid Water, but would introduce padding. Remember
+ // it in case we don't find any Water that doesn't do this.
+ if (!FoundWaterThatWouldPad) {
+ FoundWaterThatWouldPad = true;
+ IPThatWouldPad = IP;
+ }
+ } else {
+ WaterIter = IP;
+ return true;
+ }
+ }
+ if (IP == B)
+ break;
+ }
+ if (FoundWaterThatWouldPad) {
+ WaterIter = IPThatWouldPad;
+ return true;
+ }
+ return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+ BBSizes[UserMBB->getNumber()];
+ assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there. (The addition
+ // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+ // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
+ // inside OffsetIsInRange.
+ if (BBHasFallthrough(UserMBB) &&
+ OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
+ U.MaxDisp, U.NegOk, U.IsSoImm)) {
+ DEBUG(errs() << "Split at end of block\n");
+ if (&UserMBB->back() == UserMI)
+ assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block.
+ // Record it for branch lengthening; this new branch will not get out of
+ // range, but if the preceding conditional branch is out of range, the
+ // targets will be exchanged, and the altered branch may be out of
+ // range, so the machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ int delta = isThumb1 ? 2 : 4;
+ BBSizes[UserMBB->getNumber()] += delta;
+ AdjustBBOffsetsAfter(UserMBB, delta);
+ } else {
+ // What a big block. Find a place within the block to split it.
+ // This is a little tricky on Thumb1 since instructions are 2 bytes
+ // and constant pool entries are 4 bytes: if instruction I references
+ // island CPE, and instruction I+1 references CPE', it will
+ // not work well to put CPE as far forward as possible, since then
+ // CPE' cannot immediately follow it (that location is 2 bytes
+ // farther away from I+1 than CPE was from I) and we'd need to create
+ // a new island. So, we make a first guess, then walk through the
+ // instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions
+ // that reference CPEs will be able to use the same island area;
+ // if not, we back up the insertion point.
+
+ // The 4 in the following is for the unconditional branch we'll be
+ // inserting (allows for long branch on Thumb1). Alignment of the
+ // island is handled inside OffsetIsInRange.
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+ // This could point off the end of the block if we've already got
+ // constant pool entries following this block; only the last one is
+ // in the water list. Back past any possible branches (allow for a
+ // conditional and a maximally long unconditional).
+ if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+ BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset = BaseInsertOffset +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!OffsetIsInRange(Offset, EndInsertOffset,
+ U.MaxDisp, U.NegOk, U.IsSoImm)) {
+ BaseInsertOffset -= (isThumb1 ? 2 : 4);
+ EndInsertOffset -= (isThumb1 ? 2 : 4);
+ }
+ // This is overly conservative, as we don't account for CPEMIs
+ // being reused within the block, but it doesn't matter much.
+ EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ DEBUG(errs() << "Split in middle of big block\n");
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
+ }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
+ unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive. The 4 or 8 is the value the
+ // hardware keeps in the PC.
+ unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = LookForExistingCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createPICLabelUId();
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (LookForWater(U, UserOffset, IP)) {
+ DEBUG(errs() << "found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.count(WaterBB)) {
+ NewWaterList.erase(WaterBB);
+ NewWaterList.insert(NewIsland);
+ }
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
+ // No water found.
+ DEBUG(errs() << "No water found\n");
+ CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF.insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ DecrementOldEntry(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+ // Compensate for .align 2 in thumb mode.
+ if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
+ Size += 2;
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+ AdjustBBOffsetsAfter(NewIsland, Size);
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << '\t' << *UserMI);
+
+ return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBSizes[CPEBB->getNumber()] -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ // In thumb1 mode, the size of island may be padded by two to compensate for
+ // the alignment requirement. Then it will now be 2 when the block is
+ // empty, so fix this.
+ // All succeeding offsets have the current size value added in, fix this.
+ if (BBSizes[CPEBB->getNumber()] != 0) {
+ Size += BBSizes[CPEBB->getNumber()];
+ BBSizes[CPEBB->getNumber()] = 0;
+ }
+ }
+ AdjustBBOffsetsAfter(CPEBB, -Size);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ RemoveDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+ DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return FixUpUnconditionalBr(MF, Br);
+ return FixUpConditionalBr(MF, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!isThumb1)
+ llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBSizes[MBB->getNumber()] += 2;
+ AdjustBBOffsetsAfter(MBB, 2);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(errs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ SplitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBSizes[MBB->getNumber()] -= delta;
+ MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
+ AdjustBBOffsetsAfter(SplitBB, -delta);
+ MBB->back().eraseFromParent();
+ // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ if (isThumb)
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
+ .addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ // The net size change is an addition of one unconditional branch.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ AdjustBBOffsetsAfter(MBB, delta);
+ return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ // First two operands are predicates.
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(2).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 3) {
+ // Create the new insn and copy the predicate from the old.
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Shrink ADR and LDR from constantpool.
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned Opcode = U.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2LEApcrel:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLEApcrel;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ case ARM::t2LDRpci:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ }
+
+ if (!NewOpc)
+ continue;
+
+ unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+ // FIXME: Check if offset is multiple of scale if scale is not 4.
+ if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ U.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = U.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2CPShrunk;
+ MadeChange = true;
+ }
+ }
+
+ MadeChange |= OptimizeThumb2Branches(MF);
+ MadeChange |= OptimizeThumb2JumpTables(MF);
+ return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+ ImmBranch &Br = ImmBranches[i];
+ unsigned Opcode = Br.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2B:
+ NewOpc = ARM::tB;
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc: {
+ NewOpc = ARM::tBcc;
+ Bits = 8;
+ Scale = 2;
+ break;
+ }
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
+ continue;
+
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI;
+ if (CmpMI != Br.MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // FIXME: After the tables are shrunk, can we get rid some of the
+ // constantpool tables?
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ bool ByteOk = true;
+ bool HalfWordOk = true;
+ unsigned JTOffset = GetOffsetOf(MI) + 4;
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ // Negative offset is not ok. FIXME: We should change BB layout to make
+ // sure all the branches are forward.
+ if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+ ByteOk = false;
+ unsigned TBHLimit = ((1<<16)-1)*2;
+ if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+ HalfWordOk = false;
+ if (!ByteOk && !HalfWordOk)
+ break;
+ }
+
+ if (ByteOk || HalfWordOk) {
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ bool BaseRegKill = MI->getOperand(0).isKill();
+ if (!BaseRegKill)
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
+ MachineBasicBlock::iterator PrevI = MI;
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
+ continue;
+
+ MachineInstr *AddrMI = PrevI;
+ bool OptOk = true;
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
+ for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+ const MachineOperand &MO = AddrMI->getOperand(k);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() != BaseReg) {
+ OptOk = false;
+ break;
+ }
+ if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+ OptOk = false;
+ break;
+ }
+ }
+ if (!OptOk)
+ continue;
+
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+ // to delete it as well.
+ MachineInstr *LeaMI = PrevI;
+ if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+ LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+ LeaMI->getOperand(0).getReg() != BaseReg)
+ OptOk = false;
+
+ if (!OptOk)
+ continue;
+
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(MI->getOperand(JTOpIdx+1).getImm());
+ // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+ // is 2-byte aligned. For now, asm printer will fix it up.
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+ OrigSize += TII->GetInstSizeInBytes(LeaMI);
+ OrigSize += TII->GetInstSizeInBytes(MI);
+
+ AddrMI->eraseFromParent();
+ LeaMI->eraseFromParent();
+ MI->eraseFromParent();
+
+ int delta = OrigSize - NewSize;
+ BBSizes[MBB->getNumber()] -= delta;
+ AdjustBBOffsetsAfter(MBB, -delta);
+
+ ++NumTBs;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ AdjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+ MachineFunction &MF = *BB->getParent();
+
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF.begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF.RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
+ .addImm(ARMCC::AL).addReg(0);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000..aadfd47
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,319 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolValue
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id,
+ ARMCP::ARMCPKind kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind),
+ PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
+ ARMCP::ARMCPKind kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)),
+ LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {}
+
+const char *ARMConstantPoolValue::getModifierText() const {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ assert(false && "Shouldn't be calling this directly!");
+ return -1;
+}
+
+void
+ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddInteger(LabelId);
+ ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->PCAdjust == PCAdjust &&
+ ACPV->Modifier == Modifier) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+ errs() << " " << *this;
+}
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (Modifier) O << "(" << getModifierText() << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolConstant
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty,
+ const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
+ return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier) {
+ return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
+ GV, 0, ARMCP::CPValue, 0,
+ Modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress);
+}
+
+const GlobalValue *ARMConstantPoolConstant::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
+ if (!APC) continue;
+ if (APC->CVal == CVal && equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV);
+ return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolConstant::print(raw_ostream &O) const {
+ O << CVal->getName();
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolSymbol
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
+ unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+ AddCurrentAddress),
+ S(strdup(s)) {}
+
+ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
+ free((void*)S);
+}
+
+ARMConstantPoolSymbol *
+ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj) {
+ return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+ if (S1 == S2)
+ return true;
+ if (S1 && S2 && strcmp(S1, S2) == 0)
+ return true;
+ return false;
+}
+
+int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
+ if (!APS) continue;
+
+ if (CPV_streq(APS->S, S) && equals(APS))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
+ return ACPS && CPV_streq(ACPS->S, S) &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(S);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolSymbol::print(raw_ostream &O) const {
+ O << S;
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolMBB
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned id, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj,
+ Modifier, AddCurrentAddress),
+ MBB(mbb) {}
+
+ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID,
+ unsigned char PCAdj) {
+ return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
+ if (!APMBB) continue;
+
+ if (APMBB->MBB == MBB && equals(APMBB))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV);
+ return ACPMBB && ACPMBB->MBB == MBB &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(MBB);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ ARMConstantPoolValue::print(O);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000..0d0def3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,233 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class BlockAddress;
+class Constant;
+class GlobalValue;
+class LLVMContext;
+class MachineBasicBlock;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPExtSymbol,
+ CPBlockAddress,
+ CPLSDA,
+ CPMachineBasicBlock
+ };
+
+ enum ARMCPModifier {
+ no_modifier,
+ TLSGD,
+ GOT,
+ GOTOFF,
+ GOTTPOFF,
+ TPOFF
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // Kind of constant.
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
+ // 8 for ARM, 4 for Thumb.
+ ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+protected:
+ ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+public:
+ virtual ~ARMConstantPoolValue();
+
+ ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+ const char *getModifierText() const;
+ bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+
+ unsigned getLabelId() const { return LabelId; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+ bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+ bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
+ bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
+
+ virtual unsigned getRelocationInfo() const { return 2; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ bool equals(const ARMConstantPoolValue *A) const {
+ return this->LabelId == A->LabelId &&
+ this->PCAdjust == A->PCAdjust &&
+ this->Modifier == A->Modifier;
+ }
+
+ virtual void print(raw_ostream &O) const;
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void dump() const;
+
+ static bool classof(const ARMConstantPoolValue *) { return true; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants,
+/// Functions, and BlockAddresses.
+class ARMConstantPoolConstant : public ARMConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+
+ ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+ ARMConstantPoolConstant(Type *Ty, const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
+ static ARMConstantPoolConstant *Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ const GlobalValue *getGV() const;
+ const BlockAddress *getBlockAddress() const;
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ virtual void print(raw_ostream &O) const;
+ static bool classof(const ARMConstantPoolValue *APV) {
+ return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+ }
+ static bool classof(const ARMConstantPoolConstant *) { return true; }
+};
+
+/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
+/// symbols.
+class ARMConstantPoolSymbol : public ARMConstantPoolValue {
+ const char *S; // ExtSymbol being loaded.
+
+ ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ ~ARMConstantPoolSymbol();
+
+ static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj);
+
+ const char *getSymbol() const { return S; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isExtSymbol();
+ }
+ static bool classof(const ARMConstantPoolSymbol *) { return true; }
+};
+
+/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
+/// block.
+class ARMConstantPoolMBB : public ARMConstantPoolValue {
+ const MachineBasicBlock *MBB; // Machine basic block.
+
+ ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolMBB *Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID, unsigned char PCAdj);
+
+ const MachineBasicBlock *getMBB() const { return MBB; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isMachineBasicBlock();
+ }
+ static bool classof(const ARMConstantPoolMBB *) { return true; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 0000000..51e68b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case ARM::reloc_arm_absolute:
+ case ARM::reloc_arm_relative:
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_jt_base:
+ case ARM::reloc_arm_pic_jt:
+ assert(0 && "unsupported ARM relocation type"); break;
+
+ case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+ case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break;
+ case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break;
+ default:
+ llvm_unreachable("unknown ARM relocation type"); break;
+ }
+ return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+ return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+ return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+ return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 0000000..1c4e532
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class ARMELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ ARMELFWriterInfo(TargetMachine &TM);
+ virtual ~ARMELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 0000000..7872cb9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1317 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+using namespace llvm;
+
+static cl::opt<bool>
+VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
+ cl::desc("Verify machine code after expanding ARM pseudos"));
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+ void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const MCInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addOperand(MO);
+ else
+ DefMI.addOperand(MO);
+ }
+}
+
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ unsigned PseudoOpc;
+ unsigned RealOpc;
+ bool IsLoad;
+ bool HasWriteBack;
+ NEONRegSpacing RegSpacing;
+ unsigned char NumRegs; // D registers loaded or stored
+ unsigned char RegElts; // elements per D register; used for lane ops
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
+{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
+{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
+
+{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
+{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
+{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
+{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
+{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
+
+{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
+{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
+{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+ unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ const NEONLdStTableEntry *I =
+ std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+ if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ return I;
+ return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+ }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // For an instruction writing double-spaced subregs, the pseudo instruction
+ // has an extra operand that is a use of the super-register. Record the
+ // operand index and skip over it.
+ unsigned SrcOpIdx = 0;
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+ SrcOpIdx = OpIdx++;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source operand used for double-spaced subregs over
+ // to the new instruction as an implicit operand.
+ if (SrcOpIdx != 0) {
+ MachineOperand MO = MI.getOperand(SrcOpIdx);
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ }
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+ unsigned DstReg = 0;
+ bool DstIsDead = false;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags);
+ if (NumRegs > 1)
+ MIB.addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+ OpIdx += 1;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ // Transfer the destination register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ if (IsExt)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the other source register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ MachineInstrBuilder LO16, HI16;
+
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
+
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ }
+
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::VMOVScc:
+ case ARM::VMOVDcc: {
+ unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCr:
+ case ARM::MOVCCr: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCsi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addReg(MI.getOperand(5).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVCCsr: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addReg(MI.getOperand(3).getReg(),
+ getKillRegState(MI.getOperand(3).isKill()))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .addReg(MI.getOperand(6).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCi16: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCi:
+ case ARM::MOVCCi: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MVNCCi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::Int_eh_sjlj_dispatchsetup: {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMBaseInstrInfo *AII =
+ static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ // For functions using a base pointer, we rematerialize it (via the frame
+ // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+ // for us. Otherwise, expand to nothing.
+ if (RI.hasBasePointer(MF)) {
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function()) {
+ llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ } else if (AFI->isThumbFunction()) {
+ llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
+ } else {
+ llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
+ }
+ // If there's dynamic realignment, adjust for it.
+ if (RI.needsStackRealignment(MF)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ?
+ ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ }
+
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag: {
+ // These are just fancy MOVs insructions.
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+ ARM_AM::lsr : ARM_AM::asr),
+ 1)))
+ .addReg(ARM::CPSR, RegState::Define);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::RRX: {
+ // This encodes as "MOVs Rd, Rm, rrx
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addReg(0);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tTPsoft:
+ case ARM::TPsoft: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
+
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOV_ga_dyn:
+ case ARM::MOV_ga_pcrel:
+ case ARM::MOV_ga_pcrel_ldr:
+ case ARM::t2MOV_ga_dyn:
+ case ARM::t2MOV_ga_pcrel: {
+ // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+ unsigned LabelId = AFI->createPICLabelUId();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
+ bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+ unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
+ unsigned LO16TF = isPIC
+ ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+ unsigned HI16TF = isPIC
+ ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId);
+ if (!isPIC) {
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB3);
+ if (Opcode == ARM::MOV_ga_pcrel_ldr)
+ MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ }
+ TransferImpOps(MI, MIB1, MIB3);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
+ case ARM::VLDMQIA: {
+ unsigned NewOpc = ARM::VLDMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register destination.
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the source register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the destination operands (D subregs).
+ unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VSTMQIA: {
+ unsigned NewOpc = ARM::VSTMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register source.
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the destination register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the source operands (D subregs).
+ unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ MIB.addReg(D0).addReg(D1);
+
+ if (SrcIsKill) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::VDUPfqf:
+ case ARM::VDUPfdf:{
+ unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
+ ARM::VDUPLN32d;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
+ &ARM::DPR_VFP2RegClass);
+ // The lane is [0,1] for the containing DReg superregister.
+ // Copy the dst/src register operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addReg(DReg);
+ ++OpIdx;
+ // Add the lane select operand.
+ MIB.addImm(Lane);
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ ExpandVLD(MBBI);
+ return true;
+
+ case ARM::VST1q8Pseudo:
+ case ARM::VST1q16Pseudo:
+ case ARM::VST1q32Pseudo:
+ case ARM::VST1q64Pseudo:
+ case ARM::VST1q8Pseudo_UPD:
+ case ARM::VST1q16Pseudo_UPD:
+ case ARM::VST1q32Pseudo_UPD:
+ case ARM::VST1q64Pseudo_UPD:
+ case ARM::VST2d8Pseudo:
+ case ARM::VST2d16Pseudo:
+ case ARM::VST2d32Pseudo:
+ case ARM::VST2q8Pseudo:
+ case ARM::VST2q16Pseudo:
+ case ARM::VST2q32Pseudo:
+ case ARM::VST2d8Pseudo_UPD:
+ case ARM::VST2d16Pseudo_UPD:
+ case ARM::VST2d32Pseudo_UPD:
+ case ARM::VST2q8Pseudo_UPD:
+ case ARM::VST2q16Pseudo_UPD:
+ case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST3d8Pseudo:
+ case ARM::VST3d16Pseudo:
+ case ARM::VST3d32Pseudo:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST3d8Pseudo_UPD:
+ case ARM::VST3d16Pseudo_UPD:
+ case ARM::VST3d32Pseudo_UPD:
+ case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST3q8Pseudo_UPD:
+ case ARM::VST3q16Pseudo_UPD:
+ case ARM::VST3q32Pseudo_UPD:
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
+ case ARM::VST3q8oddPseudo_UPD:
+ case ARM::VST3q16oddPseudo_UPD:
+ case ARM::VST3q32oddPseudo_UPD:
+ case ARM::VST4d8Pseudo:
+ case ARM::VST4d16Pseudo:
+ case ARM::VST4d32Pseudo:
+ case ARM::VST1d64QPseudo:
+ case ARM::VST4d8Pseudo_UPD:
+ case ARM::VST4d16Pseudo_UPD:
+ case ARM::VST4d32Pseudo_UPD:
+ case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST4q8Pseudo_UPD:
+ case ARM::VST4q16Pseudo_UPD:
+ case ARM::VST4q32Pseudo_UPD:
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
+ case ARM::VST4q8oddPseudo_UPD:
+ case ARM::VST4q16oddPseudo_UPD:
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI);
+ return true;
+
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ return true;
+
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ }
+
+ return false;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ if (VerifyARMPseudo)
+ MF.verify(this, "After expanding ARM pseudo instructions.");
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 0000000..9bc7ef2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,2120 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableARMFastISel("disable-arm-fast-isel",
+ cl::desc("Turn off experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
+
+namespace {
+
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ ARMFunctionInfo *AFI;
+
+ // Convenience variables to avoid some queries.
+ bool isThumb;
+ LLVMContext *Context;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectSIToFP(const Instruction *I);
+ bool SelectFPToSI(const Instruction *I);
+ bool SelectSDiv(const Instruction *I);
+ bool SelectSRem(const Instruction *I);
+ bool SelectCall(const Instruction *I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectIntCast(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, EVT VT);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+ unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(const GlobalValue *GV);
+
+ // Call handling routines.
+ private:
+ bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
+ bool isARMNEONPred(const MachineInstr *MI);
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return false;
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm1).addImm(Imm2));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::f64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ int Imm;
+ unsigned Opc;
+ if (is64bit) {
+ Imm = ARM_AM::getFP64Imm(Val);
+ Opc = ARM::FCONSTD;
+ } else {
+ Imm = ARM_AM::getFP32Imm(Val);
+ Opc = ARM::FCONSTS;
+ }
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addImm(Imm));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+ // For now 32-bit only.
+ if (VT != MVT::i32) return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // If we can do this in a single instruction without a constant pool entry
+ // do so now.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+ unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), DestReg)
+ .addImm(CI->getSExtValue()));
+ return DestReg;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg)
+ .addConstantPoolIndex(Idx));
+ else
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+
+ // TODO: Need more magic for ARM PIC.
+ if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb) {
+ unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ }
+ AddOptionalDefs(MIB);
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ else
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
+ NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
+ }
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple()) return false;
+ VT = evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // Materialize the global variable's address into a reg which can
+ // then be used later to load the variable.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+ if (Tmp == 0) return false;
+
+ Addr.Base.Reg = Tmp;
+ return true;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+ bool needsLowering = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+ break;
+ }
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ // Since the offset is too large for the load/store instruction
+ // get the reg+offset into a register.
+ if (needsLowering) {
+ Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ /*Op0IsKill*/false, Addr.Offset, MVT::i32);
+ Addr.Offset = 0;
+ }
+}
+
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+ VT.getSimpleVT().SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ }
+ AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::f32:
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ case MVT::f64:
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
+ return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1: {
+ unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass);
+ unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), Res)
+ .addReg(SrcReg).addImm(1));
+ SrcReg = Res;
+ } // Fallthrough here.
+ case MVT::i8:
+ StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ break;
+ case MVT::i16:
+ StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ break;
+ case MVT::i32:
+ StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg, getKillRegState(true));
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
+ return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0) return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+
+ // If we can, avoid recomputing the compare - redoing it could lead to wonky
+ // behavior.
+ // TODO: Factor this out.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ MVT SourceVT;
+ Type *Ty = CI->getOperand(0)->getType();
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())
+ && isTypeLegal(Ty, SourceVT)) {
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ switch (SourceVT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ break;
+ }
+
+ // Get the compare predicate.
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ ARMCC::CondCodes ARMPred = getComparePred(Predicate);
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+ unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TstOpc))
+ .addReg(OpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ }
+
+ unsigned CmpReg = getRegForValue(BI->getCondition());
+ if (CmpReg == 0) return false;
+
+ // We've been divorced from our compare! Our block was split, and
+ // now our compare lives in a predecessor block. We musn't
+ // re-compare here, as the children of the compare aren't guaranteed
+ // live across the block boundary (we *could* check for this).
+ // Regardless, the compare has been done in the predecessor block,
+ // and it left a value for us in a virtual register. Ergo, we test
+ // the one-bit value left in the virtual register.
+ unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+ .addReg(CmpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ unsigned CondReg;
+ switch (VT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ CondReg = ARM::CPSR;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+ TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero
+ = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(CondReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ // FIXME: Handle sign-extension where necessary.
+ if (!I->getOperand(0)->getType()->isIntegerTy(32))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ if (FP == 0) return false;
+
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+ else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ else return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(FP));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+ else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ else return false;
+
+ // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT != MVT::i32) return false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(1));
+ unsigned ResultReg = createResultReg(RC);
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+ MVT VT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SDIV_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+ MVT VT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SREM_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT VT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned Opc;
+ bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+ EVT SrcVT, unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ // Ignore fastcc. Silence compiler warnings.
+ (void)RetFastCC_ARM_APCS;
+ (void)FastCC_ARM_APCS;
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg())
+ .addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64) return false;
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ // TODO: Only handle register args for now.
+ if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
+
+ if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ }
+ }
+ return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ EVT DestVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ EVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (F.isVarArg())
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc)));
+ return true;
+}
+
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+ // Darwin needs the r9 versions of the opcodes.
+ bool isDarwin = Subtarget->isTargetDarwin();
+ if (isThumb) {
+ return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ } else {
+ return isDarwin ? ARM::BLr9 : ARM::BL;
+ }
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ Type *ArgTy = Op->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLr9 for darwin, BL otherwise.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc = ARMSelectCallOp(NULL);
+ if(isThumb)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addExternalSymbol(TLI.getLibcallName(Call));
+ else
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(TLI.getLibcallName(Call)));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm or worry about intrinsics yet.
+ if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+ // Only handle global variable Callees.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ // Let SDISel handle vararg functions.
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgRegs.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLr9 for darwin, BL otherwise.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc = ARMSelectCallOp(GV);
+ // Explicitly adding the predicate here.
+ if(isThumb)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addGlobalAddress(GV, 0, 0);
+ else
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+
+}
+
+bool ARMFastISel::SelectIntCast(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers. The high bits for a type smaller than
+ // the register size are assumed to be undefined.
+ Type *DestTy = I->getType();
+ Value *Op = I->getOperand(0);
+ Type *SrcTy = Op->getType();
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(SrcTy, true);
+ DestVT = TLI.getValueType(DestTy, true);
+
+ if (isa<TruncInst>(I)) {
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ UpdateValueMap(I, SrcReg);
+ return true;
+ }
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned Opc;
+ bool isZext = isa<ZExtInst>(I);
+ bool isBoolZext = false;
+ if (!SrcVT.isSimple())
+ return false;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i16:
+ if (!Subtarget->hasV6Ops()) return false;
+ if (isZext)
+ Opc = isThumb ? ARM::t2UXTH : ARM::UXTH;
+ else
+ Opc = isThumb ? ARM::t2SXTH : ARM::SXTH;
+ break;
+ case MVT::i8:
+ if (!Subtarget->hasV6Ops()) return false;
+ if (isZext)
+ Opc = isThumb ? ARM::t2UXTB : ARM::UXTB;
+ else
+ Opc = isThumb ? ARM::t2SXTB : ARM::SXTB;
+ break;
+ case MVT::i1:
+ if (isZext) {
+ Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ isBoolZext = true;
+ break;
+ }
+ return false;
+ }
+
+ // FIXME: We could save an instruction in many cases by special-casing
+ // load instructions.
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+ if (isBoolZext)
+ MIB.addImm(1);
+ else
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectSIToFP(I);
+ case Instruction::FPToSI:
+ return SelectFPToSI(I);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectSDiv(I);
+ case Instruction::SRem:
+ return SelectSRem(I);
+ case Instruction::Call:
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntCast(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ // Completely untested on non-darwin.
+ const TargetMachine &TM = funcInfo.MF->getTarget();
+
+ // Darwin and thumb1 only for now.
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ !DisableARMFastISel)
+ return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 0000000..2d1de6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1087 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified. Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+ const ARMBaseInstrInfo &TII,
+ const unsigned *CSRegs) {
+ // Integer spill area is handled with "pop".
+ if (MI->getOpcode() == ARM::LDMIA_RET ||
+ MI->getOpcode() == ARM::t2LDMIA_RET ||
+ MI->getOpcode() == ARM::LDMIA_UPD ||
+ MI->getOpcode() == ARM::t2LDMIA_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
+ MI->getOpcode() == ARM::LDR_POST_REG ||
+ MI->getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+ MI->getOperand(1).getReg() == ARM::SP)
+ return true;
+
+ return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ ARMCC::AL, 0, TII, MIFlags);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ ARMCC::AL, 0, TII, MIFlags);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitPrologue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ // Allocate the vararg register save area. This is not counted in NumBytes.
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ MachineInstr::FrameSetup);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ // Move past area 1.
+ if (GPRCS1Size > 0) MBBI++;
+
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For Darwin, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not Darwin, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it is
+ // now safe to emit this assignment.
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ // Move past area 2.
+ if (GPRCS2Size > 0) MBBI++;
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ // Move past area 3.
+ if (DPRCSSize > 0) {
+ MBBI++;
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ MBBI++;
+ }
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Adjust SP after all the callee-save spills.
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
+ if (RegInfo->needsStackRealignment(MF)) {
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill));
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill));
+ }
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ // FIXME: Clarify FrameSetup flags here.
+ if (RegInfo->hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ RegInfo->getBaseRegister())
+ .addReg(ARM::SP));
+ }
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getDesc().isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitEpilogue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ if (!isCSRestore(MBBI, TII, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For Darwin, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ }
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ }
+ } else if (NumBytes)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+ // Increment past our save areas.
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
+ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ }
+
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+ unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+ ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ }
+
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ int SPAdj) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ Offset += SPAdj;
+ if (AFI->isGPRCalleeSavedArea1Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = RegInfo->getBaseRegister();
+ }
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() &&
+ !RegInfo->hasBasePointer(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+ if (AFI->isThumb2Function()) {
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ } else if (AFI->isThumb2Function()) {
+ // Use add <rd>, sp, #<imm8>
+ // ldr <rd>, [sp, #<imm8>]
+ // if at all possible to save space.
+ if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+ return Offset;
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references. ldr <rt>,[<rn>, #-<imm8>]
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned StmOpc, unsigned StrOpc,
+ bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for
+ // @llvm.returnaddress then it's already added to the function and
+ // entry block live-in sets.
+ bool isKill = true;
+ if (Reg == ARM::LR) {
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // If NoGap is true, push consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+ LastReg = Reg;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || StrOpc== 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP).setMIFlags(MIFlags));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+ } else if (Regs.size() == 1) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+ ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP).setMIFlags(MIFlags)
+ .addImm(-4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned LdmOpc, unsigned LdrOpc,
+ bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+ RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri ||
+ RetOpcode == ARM::TCRETURNriND);
+
+ SmallVector<unsigned, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ bool DeleteRet = false;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ Reg = ARM::PC;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ // Fold the return instruction into the LDM.
+ DeleteRet = true;
+ }
+
+ // If NoGap is true, pop consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+
+ LastReg = Reg;
+ Regs.push_back(Reg);
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || LdrOpc == 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i], getDefRegState(true));
+ if (DeleteRet) {
+ MIB->copyImplicitOps(&*MI);
+ MI->eraseFromParent();
+ }
+ MI = MIB;
+ } else if (Regs.size() == 1) {
+ // If we adjusted the reg to PC from LR above, switch it back here. We
+ // only do that for LDM.
+ if (Regs[0] == ARM::PC)
+ Regs[0] = ARM::LR;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+ unsigned PushOneOpc = AFI->isThumbFunction() ?
+ ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+ unsigned FltOpc = ARM::VSTMDDB_UPD;
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ MachineInstr::FrameSetup);
+
+ return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+ unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
+ unsigned FltOpc = ARM::VLDMDIA_UPD;
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register);
+
+ return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -MFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ Offset += MFI->getObjectSize(i);
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const TargetFrameLowering *TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ case ARMII::AddrModeT2_i12:
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (TFI->hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Addressing modes 4 & 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
+ return 0;
+ default:
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's not always possible to restore sp from fp in a single
+ // instruction.
+ // FIXME: It will be better just to find spare register here.
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // Spill LR if Thumb1 function uses variable length argument lists.
+ if (AFI->getVarArgsRegSaveSize() > 0)
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+ // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+ // for sure what the stack size will be, but for this, an estimate is good
+ // enough. If there anything changes it, it'll be a spill, which implies
+ // we've used all the registers and so R4 is already used, so not marking
+ // it here will be OK.
+ // FIXME: It will be better just to find spare register here.
+ unsigned StackSize = estimateStackSize(MF);
+ if (MFI->hasVarSizedObjects() || StackSize > 508)
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ }
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases =
+ RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
+
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF, this)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+ bool ExtraCSSpill = false;
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs, spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spill high register if the function is thumb1
+ if (!AFI->isThumb1OnlyFunction() ||
+ isARMLowRegister(Reg) || Reg == ARM::LR) {
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. Thumb1 needs a spill slot for stack pointer
+ // adjustments also, even when the frame itself is small.
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ }
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 0000000..61bb8af
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,76 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+ const ARMSubtarget &STI;
+
+public:
+ explicit ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg, int SPAdj) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ private:
+ void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned MIFlags = 0) const;
+ void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ unsigned LdrOpc, bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 0000000..5f863ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,219 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+namespace {
+ class ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td) : TD(td) { }
+
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; j != e; ++j) {
+ Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += TD->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ }
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "_MergedGlobals");
+ for (size_t k = i; k < j; ++k) {
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module &M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ unsigned Alignment = I->getAlignment();
+ Type *Ty = I->getType()->getElementType();
+ if (Alignment > TD->getABITypeAlignment(Ty))
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+ const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+ if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+ BSSGlobals.push_back(I);
+ else if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ if (BSSGlobals.size() > 1)
+ Changed |= doMerge(BSSGlobals, M, false);
+
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 0000000..787f6a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,120 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+ const TargetRegisterInfo &TRI) {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MCID.mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (!MI->isDebugValue()) {
+ if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+
+ // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+ // a VMLA / VMLS will cause 4 cycle stall.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+ MachineInstr *DefMI = LastMI;
+ const MCInstrDesc &LastMCID = LastMI->getDesc();
+ // Skip over one non-VFP / NEON instruction.
+ if (!LastMCID.isBarrier() &&
+ // On A9, AGU and NEON/FPU are muxed.
+ !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+ MachineBasicBlock::iterator I = LastMI;
+ if (I != LastMI->getParent()->begin()) {
+ I = llvm::prior(I);
+ DefMI = &*I;
+ }
+ }
+
+ if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+ (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+ hasRAWHazard(DefMI, MI, TRI))) {
+ // Try to schedule another instruction for the next 4 cycles.
+ if (FpMLxStalls == 0)
+ FpMLxStalls = 4;
+ return Hazard;
+ }
+ }
+ }
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+ LastMI = 0;
+ FpMLxStalls = 0;
+ ITBlockSize = 0;
+ ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ if (!MI->isDebugValue()) {
+ LastMI = MI;
+ FpMLxStalls = 0;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+ if (FpMLxStalls && --FpMLxStalls == 0)
+ // Stalled for 4 cycles but still can't schedule any other instructions.
+ LastMI = 0;
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 0000000..2bc218d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMSubtarget &STI;
+
+ MachineInstr *LastMI;
+ unsigned FpMLxStalls;
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ ARMHazardRecognizer(const InstrItineraryData *ItinData,
+ const ARMBaseInstrInfo &tii,
+ const ARMBaseRegisterInfo &tri,
+ const ARMSubtarget &sti,
+ const ScheduleDAG *DAG) :
+ ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+ TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 0000000..5ee009c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,3211 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+ cl::desc("Check fp vmla / vmls hazard at isel time"),
+ cl::init(true));
+
+static cl::opt<bool>
+DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
+ cl::desc("Enable / disable ARM integer abs transform"),
+ cl::init(false));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMBaseTargetMachine &TM;
+ const ARMBaseInstrInfo *TII;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ /// getI32Imm - Return a target constant of type i32 with the specified
+ /// value.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ SDNode *Select(SDNode *N);
+
+
+ bool hasNoVMLxHazardUse(SDNode *N) const;
+ bool isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+ bool SelectRegShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C,
+ bool CheckProfitability = true);
+ bool SelectImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, bool CheckProfitability = true);
+ bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C) {
+ // Don't apply the profitability check
+ return SelectRegShifterOperand(N, A, B, C, false);
+ }
+ bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B) {
+ // Don't apply the profitability check
+ return SelectImmShifterOperand(N, A, B, false);
+ }
+
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
+ bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
+
+ bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+ // Thumb Addressing Modes:
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+ unsigned Scale);
+ bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ // Thumb 2 Addressing Modes:
+ bool SelectT2ShifterOperandReg(SDValue N,
+ SDValue &BaseReg, SDValue &Opc);
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm);
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm);
+
+ inline bool is_so_imm(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }
+
+ inline bool is_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(~Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(~Imm) != -1;
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+ /// ARM.
+ SDNode *SelectARMIndexedLoad(SDNode *N);
+ SDNode *SelectT2IndexedLoad(SDNode *N);
+
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// loads of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+ /// SelectVST - Select NEON store intrinsics. NumVecs should
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// stores of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+ /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
+ /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// load/store of D registers and Q registers.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes);
+
+ /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
+ /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// for loading D registers. (Q registers are not supported.)
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *Opcodes);
+
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+ /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+ SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+ /// SelectCMOVOp - Select CMOV instructions for ARM.
+ SDNode *SelectCMOVOp(SDNode *N);
+ SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+
+ // Select special operations if node forms integer ABS pattern
+ SDNode *SelectABSOp(SDNode *N);
+
+ SDNode *SelectConcatVector(SDNode *N);
+
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ // Get the alignment operand for a NEON VLD or VST instruction.
+ SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc &&
+ isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, int Scale,
+ int RangeMin, int RangeMax,
+ int &ScaledConstant) {
+ assert(Scale > 0 && "Invalid scale!");
+
+ // Check that this is a constant.
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+ if (!C)
+ return false;
+
+ ScaledConstant = (int) C->getZExtValue();
+ if ((ScaledConstant % Scale) != 0)
+ return false;
+
+ ScaledConstant /= Scale;
+ return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+ if (OptLevel == CodeGenOpt::None)
+ return true;
+
+ if (!CheckVMLxHazard)
+ return true;
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+ return true;
+
+ if (!N->hasOneUse())
+ return false;
+
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg)
+ return true;
+ if (Use->isMachineOpcode()) {
+ const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+ if (MCID.mayStore())
+ return true;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return true;
+ // vmlx feeding into another vmlx. We actually want to unfold
+ // the use later in the MLxExpansion pass. e.g.
+ // vmla
+ // vmla (stall 8 cycles)
+ //
+ // vmul (5 cycles)
+ // vadd (5 cycles)
+ // vmla
+ // This adds up to about 18 - 19 cycles.
+ //
+ // vmla
+ // vmul (stall 4 cycles)
+ // vadd adds up to about 14 cycles.
+ return TII->isFpMLxInstruction(Opcode);
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal,
+ unsigned ShAmt) {
+ if (!Subtarget->isCortexA9())
+ return true;
+ if (Shift.hasOneUse())
+ return true;
+ // R << 2 is free.
+ return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
+bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS) return false;
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (RHS) return false;
+
+ ShReg = N.getOperand(1);
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ISD::ADD.
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) // 12 bits.
+ return false;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ADD.
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() != ISD::SUB) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R +/- (R << N) and reuse it.
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
+ return false;
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+ Offset = N.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ if (AddSub == ARM_AM::sub) Val *= -1;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(Val, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -256 + 1, 256, RHSC)) { // 8 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+ -256 + 1, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+ SDValue &Align) {
+ Addr = N;
+
+ unsigned Alignment = 0;
+ if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+ // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+ // The maximum alignment is equal to the memory size being referenced.
+ unsigned LSNAlign = LSN->getAlignment();
+ unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+ if (LSNAlign > MemSize && MemSize > 1)
+ Alignment = MemSize;
+ } else {
+ // All other uses of addrmode6 are for intrinsics. For now just record
+ // the raw alignment value; it will be refined later based on the legal
+ // alignment operands for the intrinsic.
+ Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+ }
+
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+ SDValue &Base, SDValue &Offset){
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+ ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+ if (!NC || !NC->isNullValue())
+ return false;
+
+ Base = Offset = N;
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+ SDValue &Offset, unsigned Scale) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP))
+ return false;
+
+ // FIXME: Why do we explicitly check for a match here and then return false?
+ // Presumably to allow something else to match, but shouldn't this be
+ // documented?
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+ return false;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else {
+ Base = N;
+ }
+
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+ unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+ // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+ if (LHSC != 0 || RHSC != 0) return false;
+
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
+ SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select t2LDRpci instead.
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
+ // Let t2LDRi8 handle (R - imm8).
+ return false;
+
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R - imm8 operands.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm){
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ int RHSC;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+ OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+ SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm) {
+ // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+ return false;
+ else if (RHSC < 0 && RHSC >= -255) // 8 bits
+ return false;
+ }
+
+ // Look for (R + R) or (R + (R << [1,2,3])).
+ unsigned ShAmt = 0;
+ Base = N.getOperand(0);
+ OffReg = N.getOperand(1);
+
+ // Swap if it is ((R << c) + R).
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
+ if (ShOpcVal != ARM_AM::lsl) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
+ if (ShOpcVal == ARM_AM::lsl)
+ std::swap(Base, OffReg);
+ }
+
+ if (ShOpcVal == ARM_AM::lsl) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+ OffReg = OffReg.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+ return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 && isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_PRE_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 && !isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_POST_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
+ Match = true;
+
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_PRE_IMM;
+ } else if (!isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_POST_IMM;
+ } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
+ }
+ }
+ }
+
+ if (Match) {
+ if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 5);
+ } else {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 6);
+ }
+ }
+
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ SDValue Offset;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+ break;
+ case MVT::i16:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+ break;
+ case MVT::i8:
+ case MVT::i1:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+ break;
+ default:
+ return NULL;
+ }
+ Match = true;
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ MVT::Other, Ops, 5);
+ }
+
+ return NULL;
+}
+
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+ bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ if (Alignment >= 32 && NumRegs == 4)
+ Alignment = 32;
+ else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ Alignment = 16;
+ else if (Alignment >= 8)
+ Alignment = 8;
+ else
+ Alignment = 0;
+
+ return CurDAG->getTargetConstant(Alignment, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+ std::vector<EVT> ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDNode *VLd;
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ } else {
+ // Otherwise, quad registers are loaded with two separate instructions,
+ // where one loads the even registers and the other loads the odd registers.
+ EVT AddrTy = MemAddr.getValueType();
+
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
+
+ // Load the odd subregs.
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ }
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ SDValue SrcReg;
+ if (NumVecs == 1) {
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2)
+ SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ // If it's a vst3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // Form a QQ register.
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
+ SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ }
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(SrcReg);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VSt =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ // Transfer memoperands.
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA, 7);
+ cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ return VStB;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld/vst lane type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ // Quad-register operations:
+ case MVT::v8i16: OpcodeIndex = 0; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 1; break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+ Ops.push_back(SuperReg);
+ Ops.push_back(getI32Imm(Lane));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs, unsigned *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld-dup type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
+ unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ std::vector<EVT> ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+ SuperReg = SDValue(VLdDup, 0);
+
+ // Extract the subregisters.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ unsigned SubIdx = ARM::dsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+ bool isSigned) {
+ if (!Subtarget->hasV6T2Ops())
+ return NULL;
+
+ unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+ // For unsigned extracts, check for a shift right and mask
+ unsigned And_imm = 0;
+ if (N->getOpcode() == ISD::AND) {
+ if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return NULL;
+
+ unsigned Srl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = CountTrailingOnes_32(And_imm) - 1;
+ unsigned LSB = Srl_imm;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+ }
+
+ // Otherwise, we're looking for a shift of a shift
+ unsigned Shl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+ assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+ unsigned Srl_imm = 0;
+ if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = 32 - Srl_imm - 1;
+ int LSB = Srl_imm - Shl_imm;
+ if (LSB < 0)
+ return NULL;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+ unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+ unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+ unsigned Opc = 0;
+ switch (SOShOp) {
+ case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+ case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+ case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+ case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+ default:
+ llvm_unreachable("Unknown so_reg opcode!");
+ break;
+ }
+ SDValue SOShImm =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
+ }
+
+ if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ Opc = ARM::t2MOVCCi;
+ } else if (TrueImm <= 0xffff) {
+ Opc = ARM::t2MOVCCi16;
+ } else if (is_t2_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::t2MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+ // Large immediate.
+ Opc = ARM::t2MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = is_so_imm(TrueImm);
+ if (isSoImm) {
+ Opc = ARM::MOVCCi;
+ } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+ Opc = ARM::MOVCCi16;
+ } else if (is_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() &&
+ (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+ // Large immediate.
+ Opc = ARM::MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ SDValue CCR = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(CC.getOpcode() == ISD::Constant);
+ assert(CCR.getOpcode() == ISD::Register);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also VMOVScc and VMOVDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = Subtarget->isThumb()
+ ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+ : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::VMOVScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::VMOVDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+}
+
+/// Target-specific DAG combining for ISD::XOR.
+/// Target-independent combining lowers SELECT_CC nodes of the form
+/// select_cc setg[ge] X, 0, X, -X
+/// select_cc setgt X, -1, X, -X
+/// select_cc setl[te] X, 0, -X, X
+/// select_cc setlt X, 1, -X, X
+/// which represent Integer ABS into:
+/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// ARM instruction selection detects the latter and matches it to
+/// ARM::ABS or ARM::t2ABS machine node.
+SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+ SDValue XORSrc0 = N->getOperand(0);
+ SDValue XORSrc1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ if (DisableARMIntABS)
+ return NULL;
+
+ if (Subtarget->isThumb1Only())
+ return NULL;
+
+ if (XORSrc0.getOpcode() != ISD::ADD ||
+ XORSrc1.getOpcode() != ISD::SRA)
+ return NULL;
+
+ SDValue ADDSrc0 = XORSrc0.getOperand(0);
+ SDValue ADDSrc1 = XORSrc0.getOperand(1);
+ SDValue SRASrc0 = XORSrc1.getOperand(0);
+ SDValue SRASrc1 = XORSrc1.getOperand(1);
+ ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
+ EVT XType = SRASrc0.getValueType();
+ unsigned Size = XType.getSizeInBits() - 1;
+
+ if (ADDSrc1 == XORSrc1 &&
+ ADDSrc0 == SRASrc0 &&
+ XType.isInteger() &&
+ SRAConstant != NULL &&
+ Size == SRAConstant->getZExtValue()) {
+
+ unsigned Opcode = ARM::ABS;
+ if (Subtarget->isThumb2())
+ Opcode = ARM::t2ABS;
+
+ return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ }
+
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ Ops.push_back(Node->getOperand(2)); // Low part of Val1
+ Ops.push_back(Node->getOperand(3)); // High part of Val1
+ if (Opc == ARM::ATOMCMPXCHG6432) {
+ Ops.push_back(Node->getOperand(4)); // Low part of Val2
+ Ops.push_back(Node->getOperand(5)); // High part of Val2
+ }
+ Ops.push_back(Node->getOperand(0)); // Chain
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other,
+ Ops.data() ,Ops.size());
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::XOR: {
+ // Select special operations if XOR node forms integer ABS pattern
+ SDNode *ResNode = SelectABSOp(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ bool UseCP = true;
+ if (Subtarget->hasThumb2())
+ // Thumb2-aware targets have the MOVT instruction, so all immediates can
+ // be done with MOV + MOVT, at worst.
+ UseCP = 0;
+ else {
+ if (Subtarget->isThumb()) {
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ } else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ }
+
+ if (UseCP) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb1Only()) {
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+ Ops, 4);
+ } else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops, 5);
+ }
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb1Only()) {
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
+ } else {
+ unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+ ARM::t2ADDri : ARM::ADDri);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SRL:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+ break;
+ case ISD::SRA:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+ return I;
+ break;
+ case ISD::MUL:
+ if (Subtarget->isThumb1Only())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ unsigned ShImm = Log2_32(RHSV-1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
+ }
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ unsigned ShImm = Log2_32(RHSV+1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
+ }
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check for unsigned bitfield extract
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+
+ // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+ // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+ // are entirely contributed by c2 and lower 16-bits are entirely contributed
+ // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+ // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ break;
+ unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ? ARM::t2MOVTi16
+ : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+ if (!Opc)
+ break;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (!N1C)
+ break;
+ if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+ SDValue N2 = N0.getOperand(1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (!N2C)
+ break;
+ unsigned N1CVal = N1C->getZExtValue();
+ unsigned N2CVal = N2C->getZExtValue();
+ if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+ (N1CVal & 0xffffU) == 0xffffU &&
+ (N2CVal & 0xffffU) == 0x0U) {
+ SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+ MVT::i32);
+ SDValue Ops[] = { N0.getOperand(0), Imm16,
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ }
+ }
+ break;
+ }
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+ N->getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMULL : ARM::UMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMULL : ARM::SMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::LOAD: {
+ SDNode *ResNode = 0;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ResNode = SelectT2IndexedLoad(N);
+ else
+ ResNode = SelectARMIndexedLoad(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ?
+ ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ MVT::Glue, Ops, 5);
+ Chain = SDValue(ResNode, 0);
+ if (N->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InFlag);
+ }
+ ReplaceUses(SDValue(N, 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
+ return NULL;
+ }
+ case ARMISD::CMOV:
+ return SelectCMOVOp(N);
+ case ARMISD::VZIP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4i16: Opc = ARM::VZIPd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VZIPd32; break;
+ case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8i16: Opc = ARM::VZIPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VZIPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VUZP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4i16: Opc = ARM::VUZPd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VUZPd32; break;
+ case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8i16: Opc = ARM::VUZPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VUZPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VTRN: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4i16: Opc = ARM::VTRNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8i16: Opc = ARM::VTRNq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VTRNq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
+
+ case ARMISD::VLD2DUP: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+ ARM::VLD2DUPd32Pseudo };
+ return SelectVLDDup(N, false, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, false, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+ ARM::VLD2DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+ ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+ ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+ ARM::VLD2q32Pseudo_UPD };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+ ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+ ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST2_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+ ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+ ARM::VST2q32Pseudo_UPD };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST3_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_ldrexd: {
+ SDValue MemAddr = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+
+ unsigned NewOpc = ARM::LDREXD;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ NewOpc = ARM::t2LDREXD;
+
+ // arm_ldrexd returns a i64 value in {i32, i32}
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ // place arguments in the right order
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+ Ops.size());
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+
+ // Until there's support for specifing explicit register constraints
+ // like the use of even/odd register pair, hardcode ldrexd to always
+ // use the pair [R0, R1] to hold the load result.
+ Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
+ SDValue(Ld, 0), SDValue(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
+ SDValue(Ld, 1), Chain.getValue(1));
+
+ // Remap uses.
+ SDValue Glue = Chain.getValue(1);
+ if (!SDValue(N, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R0, MVT::i32, Glue);
+ Glue = Result.getValue(2);
+ ReplaceUses(SDValue(N, 0), Result);
+ }
+ if (!SDValue(N, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R1, MVT::i32, Glue);
+ Glue = Result.getValue(2);
+ ReplaceUses(SDValue(N, 1), Result);
+ }
+
+ ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+ return NULL;
+ }
+
+ case Intrinsic::arm_strexd: {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Val0 = N->getOperand(2);
+ SDValue Val1 = N->getOperand(3);
+ SDValue MemAddr = N->getOperand(4);
+
+ // Until there's support for specifing explicit register constraints
+ // like the use of even/odd register pair, hardcode strexd to always
+ // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
+ Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
+ SDValue(0, 0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
+
+ SDValue Glue = Chain.getValue(1);
+ Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R2, MVT::i32, Glue);
+ Glue = Val0.getValue(1);
+ Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R3, MVT::i32, Glue);
+
+ // Store exclusive double return a i32 value which is the return status
+ // of the issued store.
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ // place arguments in the right order
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+
+ unsigned NewOpc = ARM::STREXD;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ NewOpc = ARM::t2STREXD;
+
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+ Ops.size());
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
+ }
+
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld2: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld3: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld4: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld2lane: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld3lane: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld4lane: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst2: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst3: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst4: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst2lane: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst3lane: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ }
+ break;
+ }
+
+ case ARMISD::VTBL1: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 6> Ops;
+
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ }
+ case ARMISD::VTBL2: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+ Ops.data(), Ops.size());
+ }
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
+
+ case ARMISD::ATOMOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMOR6432);
+ case ARMISD::ATOMXOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMXOR6432);
+ case ARMISD::ATOMADD64_DAG:
+ return SelectAtomic64(N, ARM::ATOMADD6432);
+ case ARMISD::ATOMSUB64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSUB6432);
+ case ARMISD::ATOMNAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMNAND6432);
+ case ARMISD::ATOMAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMAND6432);
+ case ARMISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSWAP6432);
+ case ARMISD::ATOMCMPXCHG64_DAG:
+ return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
+ }
+
+ return SelectCode(N);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ // Require the address to be in a register. That is safe for all ARM
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000..e44e356
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,8835 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(false));
+
+cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+namespace llvm {
+ class ARMCCState : public CCState {
+ public:
+ ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C, ParmContext PC)
+ : CCState(CC, isVarArg, MF, TM, locs, C) {
+ assert(((PC == Call) || (PC == Prologue)) &&
+ "ARMCCState users must specify whether their context is call"
+ "or prologue generation.");
+ CallOrPrologue = PC;
+ }
+ };
+}
+
+// The APCS parameter registers.
+static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
+void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
+ EVT PromotedBitwiseVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+
+ EVT ElemTy = VT.getVectorElementType();
+ if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ if (ElemTy != MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
+ setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+ if (VT.isInteger()) {
+ setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction(VT.getSimpleVT(),
+ (MVT::SimpleValueType)InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+
+ // Promote all bit-wise operations.
+ if (VT.isInteger() && VT != PromotedBitwiseVT) {
+ setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::AND, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::OR, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ }
+
+ // Neon does not support vector divide/remainder operations.
+ setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+ addRegisterClass(VT, ARM::DPRRegisterClass);
+ addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+ addRegisterClass(VT, ARM::QPRRegisterClass);
+ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+ if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new ARMElfTargetObjectFile();
+}
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
+
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ if (Subtarget->isTargetDarwin()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+
+ if (Subtarget->isAAPCS_ABI()) {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+ setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+ setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+ setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+ setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+ setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+ setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+ setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+ setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+ setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+ setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+ setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
+ setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
+ setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
+ }
+
+ // Use divmod compiler-rt calls for iOS 5.0 and later.
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+ setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+ }
+
+ if (Subtarget->isThumb1Only())
+ addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ else
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+
+ if (Subtarget->hasNEON()) {
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
+
+ // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+ // neither Neon nor VFP support any arithmetic operations on it.
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
+ // Neon does not support some operations on v1i64 and v2i64 types.
+ setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
+ // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+ // a destination type that is wider than the source.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
+ }
+
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM does not have i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ if (!Subtarget->isThumb1Only()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+ }
+
+ // i64 operation support.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ }
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+ || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ if (!Subtarget->isThumb1Only()) {
+ // FIXME: We should do this for Thumb1 as well.
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+ }
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
+ // v7M has a hardware divider
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ // FIXME: This should be checking for v6k, not just v6.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ // Custom lowering for 64-bit ops
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
+ // Unordered/Monotonic case.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+ // iff target supports vfp2.
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+ }
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ // Various VFP goodness
+ if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ if (Subtarget->hasVFP2()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ }
+ // Special handling for half-precision FP.
+ if (!Subtarget->hasFP16()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ }
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
+
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::OR);
+ if (Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::AND);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ //// temporary - rewrite interface to use type
+ maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ benefitFromCodePlacementOpt = true;
+
+ setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurrence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ // When NEON is used for SP, only half of the register file is available
+ // because operations that define both SP and DP results will be constrained
+ // to the VFP2 class (D0-D15). We currently model this constraint prior to
+ // coalescing by double-counting the SP regs. See the FIXME above.
+ if (Subtarget->useNEONForSinglePrecisionFP())
+ Cost = 2;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
+ case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPZ: return "ARMISD::CMPZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+
+ case ARMISD::RBIT: return "ARMISD::RBIT";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::ADDC: return "ARMISD::ADDC";
+ case ARMISD::ADDE: return "ARMISD::ADDE";
+ case ARMISD::SUBC: return "ARMISD::SUBC";
+ case ARMISD::SUBE: return "ARMISD::SUBE";
+
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+
+ case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+ case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
+
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
+ case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
+ case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+
+ case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
+ case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
+ case ARMISD::VCGEU: return "ARMISD::VCGEU";
+ case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
+ case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::VTST: return "ARMISD::VTST";
+
+ case ARMISD::VSHL: return "ARMISD::VSHL";
+ case ARMISD::VSHRs: return "ARMISD::VSHRs";
+ case ARMISD::VSHRu: return "ARMISD::VSHRu";
+ case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
+ case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
+ case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
+ case ARMISD::VSHRN: return "ARMISD::VSHRN";
+ case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
+ case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
+ case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
+ case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
+ case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
+ case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
+ case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
+ case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
+ case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
+ case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
+ case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
+ case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
+ case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
+ case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VDUP: return "ARMISD::VDUP";
+ case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
+ case ARMISD::VEXT: return "ARMISD::VEXT";
+ case ARMISD::VREV64: return "ARMISD::VREV64";
+ case ARMISD::VREV32: return "ARMISD::VREV32";
+ case ARMISD::VREV16: return "ARMISD::VREV16";
+ case ARMISD::VZIP: return "ARMISD::VZIP";
+ case ARMISD::VUZP: return "ARMISD::VUZP";
+ case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
+ case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
+ case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
+ case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
+ }
+}
+
+EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return getPointerTy();
+ return VT.changeVectorElementTypeToInteger();
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return ARM::QQPRRegisterClass;
+ else if (VT == MVT::v8i64)
+ return ARM::QQQQPRRegisterClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::Latency;
+ }
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+
+ if (MCID.getNumDefs() == 0)
+ return Sched::RegPressure;
+ if (!Itins->isEmpty() &&
+ Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
+ return Sched::Latency;
+
+ return Sched::RegPressure;
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::LS; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C: {
+ // Use target triple & subtarget features to do actual dispatch.
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ else if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ }
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallResult(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ true,
+ isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 or half of a v2f64.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(0, MVT::i32));
+
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const {
+
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+ if (NextVA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(NextVA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ dl, DAG, NextVA,
+ Flags));
+ }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Disable tail calls if they're not supported.
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ RegsToPassVector RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+ dl, DAG, VA, Flags));
+ }
+ } else {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+ }
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (isByVal) {
+ assert(VA.isMemLoc());
+ unsigned offset = 0;
+
+ // True if this byval aggregate will be split between registers
+ // and memory.
+ if (CCInfo.isFirstByValRegValid()) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned int i, j;
+ for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ SDValue Const = DAG.getConstant(4*i, MVT::i32);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(j, Load));
+ }
+ offset = ARM::R4 - CCInfo.getFirstByValReg();
+ CCInfo.clearFirstByValReg();
+ }
+
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ // TODO: Disable AlwaysInline when it becomes possible
+ // to emit a nested call sequence.
+ MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile=*/false,
+ /*AlwaysInline=*/true,
+ MachinePointerInfo(0),
+ MachinePointerInfo(0)));
+
+ } else if (!IsSibCall) {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (EnableARMLongCalls) {
+ assert (getTargetMachine().getRelocationModel() == Reloc::Static
+ && "long-calls with non-static relocation model!");
+ // Handle a global address or an external symbol. If it's not one of
+ // those, the target's already in a register, so we don't need to do
+ // anything extra.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
+
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ }
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+ : ARMISD::CALL_NOLINK;
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+}
+
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack. Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+ assert((State->getCallOrPrologue() == Prologue ||
+ State->getCallOrPrologue() == Call) &&
+ "unhandled ParmContext");
+ if ((!State->isFirstByValRegValid()) &&
+ (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
+ }
+ // Confiscate any remaining parameter registers to preclude their
+ // assignment to subsequent parameters.
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const ARMInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMInstrInfo *TII =
+ ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+ isVarArg));
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ // Extract the first half and return it in two registers.
+ SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(1), Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+
+ // Extract the 2nd half and fall through to handle it as an f64 value.
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+ }
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ SDValue result;
+ if (Flag.getNode())
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else // Return Void
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+ return result;
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ unsigned NumCopies = 0;
+ SDNode* Copies[2];
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ Copies[NumCopies++] = Use;
+ } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ // f64 returned in a pair of GPRs.
+ for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies[UI.getUse().getResNo()] = *UI;
+ ++NumCopies;
+ }
+ } else if (Use->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Use->hasNUsesOfValue(1, 0))
+ return false;
+ Use = *Use->use_begin();
+ if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ return false;
+ Copies[NumCopies++] = Use;
+ } else {
+ return false;
+ }
+
+ if (NumCopies != 1 && NumCopies != 2)
+ return false;
+
+ bool HasRet = false;
+ for (unsigned i = 0; i < NumCopies; ++i) {
+ SDNode *Copy = Copies[i];
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ SDNode *Use = *UI;
+ if (Use == Copies[0] || Use == Copies[1])
+ continue;
+ return false;
+ }
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+ }
+
+ return HasRet;
+}
+
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!EnableARMTailCalls)
+ return false;
+
+ if (!CI->isTailCall())
+ return false;
+
+ return !Subtarget->isThumb1Only();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+ } else {
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ if (RelocM == Reloc::Static)
+ return Result;
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = GA->getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+ // FIXME: is there useful debug info available here?
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = GA->getGlobal();
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (GV->isDeclaration()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ // Initial exec model.
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+ true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ } else {
+ // local exec model
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec" TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ else
+ return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (RelocM == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV,
+ UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // FIXME: Enable this for static codegen when tool issues are fixed.
+ if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ if (RelocM == Reloc::Static)
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+ unsigned Wrapper = (RelocM == Reloc::PIC_)
+ ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ unsigned ARMPCLabelIndex = 0;
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ } else {
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
+ false, false, 0);
+
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex, PCAdj);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+ const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_thread_pointer: {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ return Result;
+ }
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+ ? ARMISD::VMULLs : ARMISD::VMULLu;
+ return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue Op5 = Op.getOperand(5);
+ bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+ unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+ ARM_MB::MemBOpt DMBOpt;
+ if (isDeviceBarrier)
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+ else
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // FIXME: handle "fence singlethread" more efficiently.
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(ARM_MB::ISH, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ if (!isRead &&
+ (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+ // ARMv7 with MP extension has PLDW.
+ return Op.getOperand(0);
+
+ unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (Subtarget->isThumb()) {
+ // Invert the bits.
+ isRead = ~isRead & 1;
+ isData = ~isData & 1;
+ }
+
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+ DAG.getConstant(isData, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+ SDValue ArgValue2;
+ if (NextVA.isMemLoc()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ } else {
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize)
+ const {
+ unsigned NumGPRs;
+ if (CCInfo.isFirstByValRegValid())
+ NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+ else {
+ unsigned int firstUnalloced;
+ firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+ sizeof(GPRArgRegs) /
+ sizeof(GPRArgRegs[0]));
+ NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+ }
+
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ VARegSize = NumGPRs * 4;
+ VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usuall
+// byval). Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned firstRegToSaveIndex;
+ if (CCInfo.isFirstByValRegValid())
+ firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+ else {
+ firstRegToSaveIndex = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ }
+
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+ ArgOffset + VARegSaveSize
+ - VARegSize,
+ false));
+ SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+ CCInfo.AnalyzeFormalArguments(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ SmallVector<SDValue, 16> ArgValues;
+ int lastInsIndex = -1;
+
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+
+ if (VA.needsCustom()) {
+ // f64 and vector types are split up into multiple registers or
+ // combinations of registers and stack slots.
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+ } else
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+ } else {
+ TargetRegisterClass *RC;
+
+ if (RegVT == MVT::f32)
+ RC = ARM::SPRRegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = ARM::DPRRegisterClass;
+ else if (RegVT == MVT::v2f64)
+ RC = ARM::QPRRegisterClass;
+ else if (RegVT == MVT::i32)
+ RC = (AFI->isThumb1OnlyFunction() ?
+ ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+ else
+ llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ int index = ArgLocs[i].getValNo();
+
+ // Some Ins[] entries become multiple ArgLoc[] entries.
+ // Process them only once.
+ if (index != lastInsIndex)
+ {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ // FIXME: For now, all byval parameter objects are marked mutable.
+ // This can be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable.
+ // Since they could be overwritten by lowering of arguments in case of
+ // a tail call.
+ if (Flags.isByVal()) {
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
+ unsigned Bytes = Flags.getByValSize() - VARegSize;
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes,
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ lastInsIndex = index;
+ }
+ }
+ }
+
+ // varargs
+ if (isVarArg)
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
+
+ return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ }
+ return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ // Uses only Z Flag
+ CompareType = ARMISD::CMPZ;
+ break;
+ }
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ DebugLoc DL = Cmp.getDebugLoc();
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Op.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+ assert(True.getValueType() == VT);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ ARMcc, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool SeenZero = false;
+ if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+ canChangeToInt(RHS, SeenZero, Subtarget) &&
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+ // If unsafe fp math optimization is enabled and there are no other uses of
+ // the CMP operands, and the condition code is EQ or NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = bitcastf32Toi32(LHS, DAG);
+ RHS = bitcastf32Toi32(RHS, DAG);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ if (Subtarget->isThumb2()) {
+ // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+ // which does another jump to the destination. This also makes it easier
+ // to translate it to TBB / TBH later.
+ // FIXME: This might not work if the function is extremely large.
+ return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+ Addr, Op.getOperand(2), JTI, UId);
+ }
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(),
+ false, false, 0);
+ Chain = Addr.getValue(1);
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ } else {
+ Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), false, false, 0);
+ Chain = Addr.getValue(1);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ }
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::FP_TO_SINT:
+ Opc = ARMISD::FTOSI;
+ break;
+ case ISD::FP_TO_UINT:
+ Opc = ARMISD::FTOUI;
+ break;
+ }
+ Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4f32)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned CastOpc;
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ CastOpc = ISD::SIGN_EXTEND;
+ Opc = ISD::SINT_TO_FP;
+ break;
+ case ISD::UINT_TO_FP:
+ CastOpc = ISD::ZERO_EXTEND;
+ Opc = ISD::UINT_TO_FP;
+ break;
+ }
+
+ Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ Opc = ARMISD::SITOF;
+ break;
+ case ISD::UINT_TO_FP:
+ Opc = ARMISD::UITOF;
+ break;
+ }
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Tmp1.getValueType();
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ } else if (VT == MVT::f32)
+ Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (VT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ ? ARM::R7 : ARM::R11;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+
+ // This function is only supposed to be called for i64 types, either as the
+ // source or destination of the bit convert.
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+ "ExpandBITCAST called for non-i64 type");
+
+ // Turn i64->f64 into VMOVDRR.
+ if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+ }
+
+ // Turn f64->i64 into VMOVRRD.
+ if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+ SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+ }
+
+ return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!ST->hasV6T2Ops())
+ return SDValue();
+
+ SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Lower vector shifts on NEON to use VSHL.
+ assert(ST->hasNEON() && "unexpected vector shift");
+
+ // Left shifts translate directly to the vshiftu intrinsic.
+ if (N->getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+ N->getOperand(0), N->getOperand(1));
+
+ assert((N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+ // NEON uses the same intrinsics for both left and right shifts. For
+ // right shifts, the shift amounts are negative, so negate the vector of
+ // shift amounts.
+ EVT ShiftVT = N->getOperand(1).getValueType();
+ SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+ getZeroVector(ShiftVT, DAG, dl),
+ N->getOperand(1));
+ Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+ Intrinsic::arm_neon_vshifts :
+ Intrinsic::arm_neon_vshiftu);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(vshiftInt, MVT::i32),
+ N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // We can get here for a node like i32 = ISD::SHL i32, i64
+ if (VT != MVT::i64)
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb1Only()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue TmpOp0, TmpOp1;
+ bool Invert = false;
+ bool Swap = false;
+ unsigned Opc = 0;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal FP comparison"); break;
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; // Fallthrough
+ case ISD::SETOEQ:
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETOLT:
+ case ISD::SETLT: Swap = true; // Fallthrough
+ case ISD::SETOGT:
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETOLE:
+ case ISD::SETLE: Swap = true; // Fallthrough
+ case ISD::SETOGE:
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETUGE: Swap = true; // Fallthrough
+ case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+ case ISD::SETUGT: Swap = true; // Fallthrough
+ case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+ case ISD::SETUEQ: Invert = true; // Fallthrough
+ case ISD::SETONE:
+ // Expand this to (OLT | OGT).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+ break;
+ case ISD::SETUO: Invert = true; // Fallthrough
+ case ISD::SETO:
+ // Expand this to (OLT | OGE).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+ break;
+ }
+ } else {
+ // Integer comparisons.
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal integer comparison"); break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETLE: Swap = true;
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+ }
+
+ // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+ if (Opc == ARMISD::VCEQ) {
+
+ SDValue AndOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ AndOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+ AndOp = Op1;
+
+ // Ignore bitconvert.
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+ AndOp = AndOp.getOperand(0);
+
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+ Opc = ARMISD::VTST;
+ Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+ Invert = !Invert;
+ }
+ }
+ }
+
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ EVT &VT, bool is128Bits, NEONModImmType type) {
+ unsigned OpCmode, Imm;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
+ switch (SplatBitSize) {
+ case 8:
+ if (type != VMOVModImm)
+ return SDValue();
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
+ assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ break;
+
+ case 16:
+ // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
+
+ case 32:
+ // NEON's 32-bit VMOV supports splat values where:
+ // * only one byte is nonzero, or
+ // * the least significant byte is 0xff and the second byte is nonzero, or
+ // * the least significant 2 bytes are 0xff and the third is nonzero.
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
+
+ // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+ if (type == OtherModImm) return SDValue();
+
+ if ((SplatBits & ~0xffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
+
+ if ((SplatBits & ~0xffffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
+
+ // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+ // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+ // VMOV.I32. A (very) minor optimization would be to replicate the value
+ // and fall through here to test for a valid 64-bit splat. But, then the
+ // caller would also need to check and handle the change in size.
+ return SDValue();
+
+ case 64: {
+ if (type != VMOVModImm)
+ return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ uint64_t BitMask = 0xff;
+ uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
+ for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+ Val |= BitMask;
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
+ return SDValue();
+ }
+ BitMask <<= 8;
+ ImmMask <<= 1;
+ }
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+ break;
+ }
+
+ default:
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
+ }
+
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+}
+
+static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+ bool &ReverseVEXT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, it may still be
+ // a VEXT but the source vectors must be swapped.
+ ExpectedElt += 1;
+ if (ExpectedElt == NumElts * 2) {
+ ExpectedElt = 0;
+ ReverseVEXT = true;
+ }
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ // Adjust the index value if the source operands will be swapped.
+ if (ReverseVEXT)
+ Imm -= NumElts;
+
+ return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned BlockSize) {
+ assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+ "Only possible block sizes for VREV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+ // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+ // range, then 0 is placed into the resulting vector. So pretty much any mask
+ // of 8 elements can work here.
+ return VT == MVT::v8i8 && M.size() == 8;
+}
+
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != 2 * i + WhichResult)
+ return false;
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ // Check if an immediate VMOV works.
+ EVT VmovVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMOVModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (~SplatBits).getZExtValue();
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMVNModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+ }
+ }
+
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
+ if (Val.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
+ }
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 2> SourceVecs;
+ SmallVector<unsigned, 2> MinElts;
+ SmallVector<unsigned, 2> MaxElts;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
+ } else if (V.getOperand(0).getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // This code doesn't know how to handle shuffles where the vector
+ // element types do not match (this happens because type legalization
+ // promotes the return type of EXTRACT_VECTOR_ELT).
+ // FIXME: It might be appropriate to extend this code to handle
+ // mismatched types.
+ return SDValue();
+ }
+
+ // Record this extraction against the appropriate vector if possible...
+ SDValue SourceVec = V.getOperand(0);
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ bool FoundSource = false;
+ for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+ if (SourceVecs[j] == SourceVec) {
+ if (MinElts[j] > EltNo)
+ MinElts[j] = EltNo;
+ if (MaxElts[j] < EltNo)
+ MaxElts[j] = EltNo;
+ FoundSource = true;
+ break;
+ }
+ }
+
+ // Or record a new source if not...
+ if (!FoundSource) {
+ SourceVecs.push_back(SourceVec);
+ MinElts.push_back(EltNo);
+ MaxElts.push_back(EltNo);
+ }
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // involved.
+ if (SourceVecs.size() > 2)
+ return SDValue();
+
+ SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ int VEXTOffsets[2] = {0, 0};
+
+ // This loop extracts the usage patterns of the source vectors
+ // and prepares appropriate SDValues for a shuffle if possible.
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ if (SourceVecs[i].getValueType() == VT) {
+ // No VEXT necessary
+ ShuffleSrcs[i] = SourceVecs[i];
+ VEXTOffsets[i] = 0;
+ continue;
+ } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ // It probably isn't worth padding out a smaller vector just to
+ // break it down again in a shuffle.
+ return SDValue();
+ }
+
+ // Since only 64-bit and 128-bit vectors are legal on ARM and
+ // we've eliminated the other cases...
+ assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+ "unexpected vector sizes in ReconstructShuffle");
+
+ if (MaxElts[i] - MinElts[i] >= NumElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (MinElts[i] >= NumElts) {
+ // The extraction can just take the second half
+ VEXTOffsets[i] = NumElts;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ } else if (MaxElts[i] < NumElts) {
+ // The extraction can just take the first half
+ VEXTOffsets[i] = 0;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ } else {
+ // An actual VEXT is needed
+ VEXTOffsets[i] = MinElts[i];
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(VEXTOffsets[i], MVT::i32));
+ }
+ }
+
+ SmallVector<int, 8> Mask;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ SDValue ExtractVec = Entry.getOperand(0);
+ int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+ .getOperand(1))->getSExtValue();
+ if (ExtractVec == SourceVecs[0]) {
+ Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ } else {
+ Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ }
+ }
+
+ // Final check before we try to produce nonsense...
+ if (isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+ &Mask[0]);
+
+ return SDValue();
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ if (VT.getVectorNumElements() == 4 &&
+ (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return true;
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm, WhichResult;
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isVREVMask(M, VT, 64) ||
+ isVREVMask(M, VT, 32) ||
+ isVREVMask(M, VT, 16) ||
+ isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
+ isVTRNMask(M, VT, WhichResult) ||
+ isVUZPMask(M, VT, WhichResult) ||
+ isVZIPMask(M, VT, WhichResult) ||
+ isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+ isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+ isVZIP_v_undef_Mask(M, VT, WhichResult));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown shuffle opcode!");
+ case OP_VREV:
+ // VREV divides the vector in half and swaps within the half.
+ if (VT.getVectorElementType() == MVT::i32 ||
+ VT.getVectorElementType() == MVT::f32)
+ return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ // vrev <4 x i16> -> VREV32
+ if (VT.getVectorElementType() == MVT::i16)
+ return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+ // vrev <4 x i8> -> VREV16
+ assert(VT.getVectorElementType() == MVT::i8);
+ return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3:
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+ case OP_VEXT1:
+ case OP_VEXT2:
+ case OP_VEXT3:
+ return DAG.getNode(ARMISD::VEXT, dl, VT,
+ OpLHS, OpRHS,
+ DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+ case OP_VUZPL:
+ case OP_VUZPR:
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+ case OP_VZIPL:
+ case OP_VZIPR:
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+ case OP_VTRNL:
+ case OP_VTRNR:
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+ }
+}
+
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ SmallVectorImpl<int> &ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (SmallVectorImpl<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ SmallVector<int, 8> ShuffleMask;
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ // FIXME: floating-point vectors should be canonicalized to integer vectors
+ // of the same time so that they get CSEd properly.
+ SVN->getMask(ShuffleMask);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (ShuffleMask[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = ShuffleMask[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
+ }
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ if (Op.getValueType() == MVT::i32 &&
+ Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ }
+
+ return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+ "unexpected CONCAT_VECTORS");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getUNDEF(MVT::v2f64);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+ DAG.getIntPtrConstant(0));
+ if (Op1.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ if (BVN->getValueType(0) != MVT::v4i32 ||
+ BVN->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned HiElt = 1 - LoElt;
+ ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+ ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+ ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+ ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+ if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+ return false;
+ if (isSigned) {
+ if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+ Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+ return true;
+ } else {
+ if (Hi0->isNullValue() && Hi1->isNullValue())
+ return true;
+ }
+ return false;
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ int64_t SExtVal = C->getSExtValue();
+ if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+ return false;
+ } else {
+ if ((C->getZExtValue() >> HalfSize) != 0)
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
+ // have been legalized as a BITCAST from v4i32.
+ if (N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+ BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+ unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ }
+ // Construct a new BUILD_VECTOR with elements truncated to half the size.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
+ NewOpc = ARMISD::VMULLs;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = ARMISD::VMULLu;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLs;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0;
+ SDValue Op1 = SkipExtension(N1, DAG);
+ if (!isMLA) {
+ Op0 = SkipExtension(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+
+ // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back vmul + vmla.
+ // vmull q0, d4, d6
+ // vmlal q0, d5, d6
+ // is faster than
+ // vaddl q0, d4, d5
+ // vmovl q1, d6
+ // vmul q0, q0, q1
+ SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0x89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(0x89, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 2);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = ARMISD::ADDC; break;
+ case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
+ case ISD::SUBC: Opc = ARMISD::SUBC; break;
+ case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
+ // Monotonic load/store is legal for all targets
+ if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
+ return Op;
+
+ // Aquire/Release load/store is not legal for targets without a
+ // dmb or equivalent available.
+ return SDValue();
+}
+
+
+static void
+ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ DebugLoc dl = Node->getDebugLoc();
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(0)); // Chain
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ // Low part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0)));
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1)));
+ if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(0)));
+ // High part of Val2
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ }
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+ Subtarget);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ }
+ return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ break;
+ case ISD::BITCAST:
+ Res = ExpandBITCAST(N, DAG);
+ break;
+ case ISD::SRL:
+ case ISD::SRA:
+ Res = Expand64BitShift(N, DAG, Subtarget);
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
+ return;
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
+ return;
+ }
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ unsigned scratch =
+ MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass);
+
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldrex dest, [ptr]
+ // cmp dest, oldval
+ // bne exitMBB
+ BB = loop1MBB;
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(dest).addReg(oldval));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex scratch, newval, [ptr]
+ // cmp scratch, #0
+ // bne loop1MBB
+ BB = loop2MBB;
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
+
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc, extendOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ extendOpc = 0;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // (sign extend dest, if required)
+ // cmp dest, incr
+ // cmov.cond scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ // Sign extend the value, if necessary.
+ if (signExtend && extendOpc) {
+ oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
+ .addReg(dest)
+ .addImm(0));
+ }
+
+ // Build compare and cmov instructions.
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(oldval).addReg(incr));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+ .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Op1, unsigned Op2,
+ bool NeedsCarry, bool IsCmpxchg) const {
+ // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ unsigned vallo = MI->getOperand(3).getReg();
+ unsigned valhi = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
+ unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *contBB = 0, *cont2BB = 0;
+ if (IsCmpxchg) {
+ contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
+ }
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ if (IsCmpxchg) {
+ MF->insert(It, contBB);
+ MF->insert(It, cont2BB);
+ }
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned storesuccess = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrexd r2, r3, ptr
+ // <binopa> r0, r2, incr
+ // <binopb> r1, r3, incr
+ // strexd storesuccess, r0, r1, ptr
+ // cmp storesuccess, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ //
+ // Note that the registers are explicitly specified because there is not any
+ // way to force the register allocator to allocate a register pair.
+ //
+ // FIXME: The hardcoded registers are not necessary for Thumb2, but we
+ // need to properly enforce the restriction that the two output registers
+ // for ldrexd must be different.
+ BB = loopMBB;
+ // Load
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(ARM::R2, RegState::Define)
+ .addReg(ARM::R3, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+
+ if (IsCmpxchg) {
+ // Add early exit
+ for (unsigned i = 0; i < 2; i++) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
+ ARM::CMPrr))
+ .addReg(i == 0 ? destlo : desthi)
+ .addReg(i == 0 ? vallo : valhi));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(i == 0 ? contBB : cont2BB);
+ BB = (i == 0 ? contBB : cont2BB);
+ }
+
+ // Copy to physregs for strexd
+ unsigned setlo = MI->getOperand(5).getReg();
+ unsigned sethi = MI->getOperand(6).getReg();
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+ } else if (Op1) {
+ // Perform binary operation
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+ .addReg(destlo).addReg(vallo))
+ .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
+ .addReg(desthi).addReg(valhi)).addReg(0);
+ } else {
+ // Copy to physregs for strexd
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
+ }
+
+ // Store
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
+ .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+ // Cmp+jump
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(storesuccess).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+/// EmitBasePointerRecalculation - For functions using a base pointer, we
+/// rematerialize it (via the frame pointer).
+void ARMTargetLowering::
+EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ MachineFunction &MF = *MI->getParent()->getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ if (!RI.hasBasePointer(MF)) return;
+
+ MachineBasicBlock::iterator MBBI = MI;
+
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function())
+ llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+ else if (AFI->isThumbFunction())
+ llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *AII, RI);
+ else
+ llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+
+ if (!RI.needsStackRealignment(MF)) return;
+
+ // If there's dynamic realignment, adjust for it.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(!AFI->isThumb1OnlyFunction());
+
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign - 1)));
+}
+
+/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
+/// registers the function context.
+void ARMTargetLowering::
+SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ const Function *F = MF->getFunction();
+
+ bool isThumb = Subtarget->isThumb();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+
+ const TargetRegisterClass *TRC =
+ isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+ // Grab constant pool and fixed stack memory operands.
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ MachineMemOperand *FIMMOSt =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore, 4, 4);
+
+ EmitBasePointerRecalculation(MI, MBB, DispatchBB);
+
+ // Load the address of the dispatch MBB into the jump buffer.
+ if (isThumb2) {
+ // Incoming value: jbuf
+ // ldr.n r5, LCPI1_1
+ // orr r5, r5, #1
+ // add r5, pc
+ // str r5, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addImm(PCLabelId);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ } else if (isThumb) {
+ // Incoming value: jbuf
+ // ldr.n r1, LCPI1_4
+ // add r1, pc
+ // mov r2, #1
+ // orrs r1, r2
+ // add r2, $jbuf, #+4 ; &jbuf[1]
+ // str r1, [r2]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId);
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill));
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36)); // &jbuf[1] :: pc
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt));
+ } else {
+ // Incoming value: jbuf
+ // ldr r1, LCPI1_1
+ // add r1, pc, r1
+ // str r1, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId));
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ }
+}
+
+MachineBasicBlock *ARMTargetLowering::
+EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FI = MFI->getFunctionContextIndex();
+
+ const TargetRegisterClass *TRC =
+ Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ MachineModuleInfo &MMI = MF->getMMI();
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+ if (!BB->isLandingPad()) continue;
+
+ // FIXME: We should assert that the EH_LABEL is the first MI in the landing
+ // pad.
+ for (MachineBasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (!II->isEHLabel()) continue;
+
+ MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+
+ SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+ for (SmallVectorImpl<unsigned>::iterator
+ CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
+ CSI != CSE; ++CSI) {
+ CallSiteNumToLPad[*CSI].push_back(BB);
+ MaxCSNum = std::max(MaxCSNum, *CSI);
+ }
+ break;
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock*> LPadList;
+ SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+ for (unsigned I = 1; I <= MaxCSNum; ++I) {
+ SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
+ LPadList.push_back(*II);
+ InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // Create the jump table and associated information.
+ MachineJumpTableInfo *JTI =
+ MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+ unsigned UId = AFI->createJumpTableUId();
+
+ // Create the MBBs for the dispatch code.
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsLandingPad();
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert and renumber MBBs.
+ MachineBasicBlock *Last = &MF->back();
+ MF->insert(MF->end(), DispatchBB);
+ MF->insert(MF->end(), DispContBB);
+ MF->insert(MF->end(), TrapBB);
+ MF->RenumberBlocks(Last);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
+
+ MachineMemOperand *FIMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile, 4, 4);
+
+ if (Subtarget->isThumb2()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg1)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else if (Subtarget->isThumb()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd));
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
+ .addReg(NewVReg6, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ MachineBasicBlock *PrevMBB = 0;
+ for (std::vector<MachineBasicBlock*>::iterator
+ I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
+ MachineBasicBlock *CurMBB = *I;
+ if (PrevMBB != CurMBB)
+ DispContBB->addSuccessor(CurMBB);
+ PrevMBB = CurMBB;
+ }
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+ for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
+ I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ for (MachineBasicBlock::succ_iterator
+ SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SMBB = *SI;
+ if (SMBB->isLandingPad()) {
+ BB->removeSuccessor(SMBB);
+ SMBB->setIsLandingPad(false);
+ }
+ }
+
+ BB->addSuccessor(DispatchBB);
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (MachineBasicBlock::reverse_iterator
+ II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
+ if (!II->getDesc().isCall()) continue;
+
+ DenseMap<unsigned, bool> DefRegs;
+ for (MachineInstr::mop_iterator
+ OI = II->operands_begin(), OE = II->operands_end();
+ OI != OE; ++OI) {
+ if (!OI->isReg()) continue;
+ DefRegs[OI->getReg()] = true;
+ }
+
+ MachineInstrBuilder MIB(&*II);
+
+ for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
+ if (!TRC->contains(SavedRegs[i])) continue;
+ if (!DefRegs[SavedRegs[i]])
+ MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // The instruction is gone now.
+ MI->eraseFromParent();
+
+ return MBB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ switch (MI->getOpcode()) {
+ default: {
+ MI->dump();
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+ // The Thumb2 pre-indexed stores have the same MI operands, they just
+ // define them differently in the .td files from the isel patterns, so
+ // they need pseudos.
+ case ARM::t2STR_preidx:
+ MI->setDesc(TII->get(ARM::t2STR_PRE));
+ return BB;
+ case ARM::t2STRB_preidx:
+ MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ return BB;
+ case ARM::t2STRH_preidx:
+ MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ return BB;
+
+ case ARM::STRi_preidx:
+ case ARM::STRBi_preidx: {
+ unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
+ ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ // Decode the offset.
+ unsigned Offset = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
+ Offset = ARM_AM::getAM2Offset(Offset);
+ if (isSub)
+ Offset = -Offset;
+
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ BuildMI(*BB, MI, dl, TII->get(NewOpc))
+ .addOperand(MI->getOperand(0)) // Rn_wb
+ .addOperand(MI->getOperand(1)) // Rt
+ .addOperand(MI->getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI->getOperand(5)) // pred
+ .addOperand(MI->getOperand(6))
+ .addMemOperand(MMO);
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::STRr_preidx:
+ case ARM::STRBr_preidx:
+ case ARM::STRH_preidx: {
+ unsigned NewOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
+ case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
+ case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
+ }
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_LOAD_MIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+ case ARM::ATOMIC_LOAD_MAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+ case ARM::ATOMIC_LOAD_UMIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+ case ARM::ATOMIC_LOAD_UMAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
+ case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+ case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+
+ case ARM::ATOMADD6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
+ isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMSUB6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
+ isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMXOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
+ isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMAND6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
+ isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMSWAP6432:
+ return EmitAtomicBinary64(MI, BB, 0, 0, false);
+ case ARM::ATOMCMPXCHG6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ false, /*IsCmpxchg*/true);
+
+ case ARM::tMOVCCr_pseudo: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // If there is an unconditional branch to the other successor, remove it.
+ BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ if (isThumb2)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ else
+ BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::ABS:
+ case ARM::t2ABS: {
+ // To insert an ABS instruction, we have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // source vreg to test against 0, the destination vreg to set,
+ // the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ // It transforms
+ // V1 = ABS V0
+ // into
+ // V2 = MOVS V0
+ // BCC (branch to SinkBB if V0 >= 0)
+ // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
+ // SinkBB: V1 = PHI(V2, V3)
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator BBI = BB;
+ ++BBI;
+ MachineFunction *Fn = BB->getParent();
+ MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ Fn->insert(BBI, RSBBB);
+ Fn->insert(BBI, SinkBB);
+
+ unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+ unsigned int ABSDstReg = MI->getOperand(0).getReg();
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = Fn->getRegInfo();
+ // In Thumb mode S must not be specified if source register is the SP or
+ // PC and if destination register is the SP, so restrict register class
+ unsigned NewMovDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ SinkBB->splice(SinkBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(RSBBB);
+ BB->addSuccessor(SinkBB);
+
+ // fall through to SinkMBB
+ RSBBB->addSuccessor(SinkBB);
+
+ // insert a movs at the end of BB
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
+ NewMovDstReg)
+ .addReg(ABSSrcReg, RegState::Kill)
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::CPSR, RegState::Define);
+
+ // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+ BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+ .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+ // insert rsbri in RSBBB
+ // Note: BCC and rsbri will be converted into predicated rsbmi
+ // by if-conversion pass
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(NewMovDstReg, RegState::Kill)
+ .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+ // insert PHI in SinkBB,
+ // reuse ABSDstReg to not change uses of ABS instruction
+ BuildMI(*SinkBB, SinkBB->begin(), dl,
+ TII->get(ARM::PHI), ABSDstReg)
+ .addReg(NewRsbDstReg).addMBB(RSBBB)
+ .addReg(NewMovDstReg).addMBB(BB);
+
+ // remove ABS instruction
+ MI->eraseFromParent();
+
+ // return last added BB
+ return SinkBB;
+ }
+ }
+}
+
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.hasPostISelHook()) {
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
+ return;
+ }
+
+ // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
+ // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
+ // operand is still set to noreg. If needed, set the optional operand's
+ // register to CPSR, and remove the redundant implicit def.
+ //
+ // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+
+ // Rename pseudo opcodes.
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ if (NewOpc) {
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ MI->setDesc(TII->get(NewOpc));
+ }
+ unsigned ccOutIdx = MCID.getNumOperands() - 1;
+
+ // Any ARM instruction that sets the 's' bit should specify an optional
+ // "cc_out" operand in the last operand position.
+ if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
+ // since we already have an optional CPSR def.
+ bool definesCPSR = false;
+ bool deadCPSR = false;
+ for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
+ definesCPSR = true;
+ if (MO.isDead())
+ deadCPSR = true;
+ MI->RemoveOperand(i);
+ break;
+ }
+ }
+ if (!definesCPSR) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
+ if (deadCPSR) {
+ assert(!MI->getOperand(ccOutIdx).getReg() &&
+ "expect uninitialized optional cc_out operand");
+ return;
+ }
+
+ // If this instruction was defined with an optional CPSR def and its dag node
+ // had a live implicit CPSR def, then activate the optional CPSR def.
+ MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MO.setReg(ARM::CPSR);
+ MO.setIsDef(true);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+
+ if (isSlctCC) {
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ } else {
+ SDValue CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue()) {
+ DoXform = true;
+ } else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ SDValue Op0 = Slct.getOperand(0);
+ EVT OpVT = isSlctCC ? Op0.getValueType() :
+ Op0.getOperand(0).getValueType();
+ bool isInt = OpVT.isInteger();
+ CC = ISD::getSetCCInverse(CC, isInt);
+
+ if (!TLI.isCondCodeLegal(CC, OpVT))
+ return SDValue(); // Inverse operator isn't legal.
+
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDValue CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+ CCOp.getOperand(0), CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, OtherOp, Result);
+ }
+ return SDValue();
+}
+
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Only perform optimization if after legalize, and if NEON is available. We
+ // also expected both operands to be BUILD_VECTORs.
+ if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+ || N0.getOpcode() != ISD::BUILD_VECTOR
+ || N1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Check that the vector operands are of the right form.
+ // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+ // operands, where N is the size of the formed vector.
+ // Each EXTRACT_VECTOR should have the same input vector and odd or even
+ // index such that we have a pair wise add pattern.
+
+ // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+ if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = N0->getOperand(0)->getOperand(0);
+ SDNode *V = Vec.getNode();
+ unsigned nextIndex = 0;
+
+ // For each operands to the ADD which are BUILD_VECTORs,
+ // check to see if each of their operands are an EXTRACT_VECTOR with
+ // the same vector and appropriate index.
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+ if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+ && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ SDValue ExtVec0 = N0->getOperand(i);
+ SDValue ExtVec1 = N1->getOperand(i);
+
+ // First operand is the vector, verify its the same.
+ if (V != ExtVec0->getOperand(0).getNode() ||
+ V != ExtVec1->getOperand(0).getNode())
+ return SDValue();
+
+ // Second is the constant, verify its correct.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+ // For the constant, we want to see all the even or all the odd.
+ if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+ || C1->getZExtValue() != nextIndex+1)
+ return SDValue();
+
+ // Increment index.
+ nextIndex+=2;
+ } else
+ return SDValue();
+ }
+
+ // Create VPADDL node.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+ TLI.getPointerTy()));
+
+ // Input is the vector.
+ Ops.push_back(Vec);
+
+ // Get widened type and narrowed type.
+ MVT widenType;
+ unsigned numElem = VT.getVectorNumElements();
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+ case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+ case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+ default:
+ assert(0 && "Invalid vector element type for padd optimization.");
+ }
+
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ widenType, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget){
+
+ // Attempt to create vpaddl for this add.
+ SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+/// vmul d3, d0, d2
+/// vmla d3, d1, d2
+/// is faster than
+/// vadd d3, d0, d1
+/// vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasVMLxForwarding())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+ Opcode = N1.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB)
+ return SDValue();
+ std::swap(N0, N1);
+ }
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+ DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT.is64BitVector() || VT.is128BitVector())
+ return PerformVMULCombine(N, DCI, Subtarget);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ uint64_t MulAmt = C->getZExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V, DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt-1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt+1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ // Attempt to use immediate-form VBIC
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VbicVT;
+ SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VbicVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
+ }
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ APInt SplatBits0;
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ APInt SplatBits1;
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1), N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && mask == ~mask2
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && ~mask == mask2
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ SDValue MaskOp = N0.getOperand(1);
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+ if (!MaskC)
+ return SDValue();
+ unsigned Mask = MaskC->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return SDValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask2 = N11C->getZExtValue();
+
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ (Mask == ~Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned amt = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(amt, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ (~Mask == Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ }
+
+ if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(Mask);
+ if (ShAmtC != LSB)
+ return SDValue();
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+ DAG.getConstant(~Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+
+ return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
+static SDValue PerformBFICombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(~InvMask);
+ unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned Mask = (1 << Width)-1;
+ unsigned Mask2 = N11C->getZExtValue();
+ if ((Mask & (~Mask2)) == 0)
+ return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N1.getOperand(0),
+ N->getOperand(2));
+ }
+ return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // vmovrrd(vmovdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ DCI.RemoveFromWorklist(LD);
+ DAG.DeleteNode(LD);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BITCAST)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ SDValue StVal = St->getValue();
+ if (!ISD::isNormalStore(St) || St->isVolatile())
+ return SDValue();
+
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = St->getDebugLoc();
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(0), BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() != MVT::i64 ||
+ StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = StVal.getDebugLoc();
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = N->getDebugLoc();
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads. If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+ return true;
+ }
+ return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI){
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getNumOperands() == 2) {
+ SDValue RV = PerformVMOVDRRCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Load i64 elements as f64 values so that type legalization does not split
+ // them up into i32 values.
+ EVT VT = N->getValueType(0);
+ if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+ return SDValue();
+ DebugLoc dl = N->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+ Ops.push_back(V);
+ // Make the DAGCombiner fold the bitcast.
+ DCI.AddToWorklist(V.getNode());
+ }
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 load inserted into a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ EVT VT = N->getValueType(0);
+ SDNode *Elt = N->getOperand(1).getNode();
+ if (VT.getVectorElementType() != MVT::i64 ||
+ !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ VT.getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(V.getNode());
+ SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+ Vec, V, N->getOperand(2));
+ return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ // The LLVM shufflevector instruction does not require the shuffle mask
+ // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+ // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
+ // operands do not match the mask length, they are extended by concatenating
+ // them with undef vectors. That is probably the right thing for other
+ // targets, but for NEON it is better to concatenate two double-register
+ // size vector operands into a single quad-register size vector. Do that
+ // transformation here:
+ // shuffle(concat(v1, undef), concat(v2, undef)) ->
+ // shuffle(concat(v1, v2), undef)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op0.getNumOperands() != 2 ||
+ Op1.getNumOperands() != 2)
+ return SDValue();
+ SDValue Concat0Op1 = Op0.getOperand(1);
+ SDValue Concat1Op1 = Op1.getOperand(1);
+ if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+ Concat1Op1.getOpcode() != ISD::UNDEF)
+ return SDValue();
+ // Skip the transformation if any of the types are illegal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ if (!TLI.isTypeLegal(VT) ||
+ !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+ !TLI.isTypeLegal(Concat1Op1.getValueType()))
+ return SDValue();
+
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ Op0.getOperand(0), Op1.getOperand(0));
+ // Translate the shuffle mask.
+ SmallVector<int, 16> NewMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts/2;
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned n = 0; n < NumElts; ++n) {
+ int MaskElt = SVN->getMaskElt(n);
+ int NewElt = -1;
+ if (MaskElt < (int)HalfElts)
+ NewElt = MaskElt;
+ else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+ NewElt = HalfElts + MaskElt - NumElts;
+ NewMask.push_back(NewElt);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: assert(0 && "unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: assert(0 && "unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // vldN-dup instructions only support 64-bit vectors for N > 1.
+ if (!VT.is64BitVector())
+ return false;
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = ARMISD::VLD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = ARMISD::VLD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = ARMISD::VLD4DUP;
+ } else {
+ return false;
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ARMISD::VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return false;
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ Ops, 2, VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op = N->getOperand(0);
+
+ // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+ // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+ if (CombineVLDDUP(N, DCI))
+ return SDValue(N, 0);
+
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant. Ignore bit_converts for now; element sizes are checked below.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ EVT VT = N->getValueType(0);
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+}
+
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+ integerPart cN;
+ integerPart c0 = 0;
+ for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+ I != E; I++) {
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+ if (!C)
+ return false;
+
+ bool isExact;
+ APFloat APF = C->getValueAPF();
+ if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+ != APFloat::opOK || !isExact)
+ return false;
+
+ c0 = (I == 0) ? cN : c0;
+ if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+ return false;
+ }
+ C = c0;
+ return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vmul.f32 d16, d17, d16
+/// vcvt.s32.f32 d16, d16
+/// becomes:
+/// vcvt.s32.f32 d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+
+ if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+ Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ uint64_t C;
+ SDValue N0 = Op->getOperand(0);
+ SDValue ConstVec = Op->getOperand(1);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+ Intrinsic::arm_neon_vcvtfp2fxu;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ N->getValueType(0),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+ DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vcvt.f32.s32 d16, d16
+/// vdiv.f32 d16, d17, d16
+/// becomes:
+/// vcvt.f32.s32 d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OpOpcode = Op.getNode()->getOpcode();
+
+ if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+ return SDValue();
+
+ uint64_t C;
+ SDValue ConstVec = N->getOperand(1);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+ Intrinsic::arm_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (isIntrinsic)
+ Cnt = -Cnt;
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ // Vector shifts: check for immediate versions and lower them.
+ // Note: This is done during DAG combining instead of DAG legalizing because
+ // the build_vectors for 64-bit vector element shift counts are generally
+ // not legal, and it is hard to see their values after they get legalized to
+ // loads from a constant pool.
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ case Intrinsic::arm_neon_vqshiftsu:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+ VShiftOpc = ARMISD::VSHL;
+ break;
+ }
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ break;
+ }
+ return SDValue();
+
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vshll intrinsic");
+
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshiftsu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ // Narrowing shifts require an immediate right shift.
+ if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
+
+ default:
+ llvm_unreachable("unhandled vector shift");
+ }
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ // Opcode already set above.
+ break;
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (Cnt == VT.getVectorElementType().getSizeInBits())
+ VShiftOpc = ARMISD::VSHLLi;
+ else
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
+ ARMISD::VSHLLs : ARMISD::VSHLLu);
+ break;
+ case Intrinsic::arm_neon_vshiftn:
+ VShiftOpc = ARMISD::VSHRN; break;
+ case Intrinsic::arm_neon_vrshifts:
+ VShiftOpc = ARMISD::VRSHRs; break;
+ case Intrinsic::arm_neon_vrshiftu:
+ VShiftOpc = ARMISD::VRSHRu; break;
+ case Intrinsic::arm_neon_vrshiftn:
+ VShiftOpc = ARMISD::VRSHRN; break;
+ case Intrinsic::arm_neon_vqshifts:
+ VShiftOpc = ARMISD::VQSHLs; break;
+ case Intrinsic::arm_neon_vqshiftu:
+ VShiftOpc = ARMISD::VQSHLu; break;
+ case Intrinsic::arm_neon_vqshiftsu:
+ VShiftOpc = ARMISD::VQSHLsu; break;
+ case Intrinsic::arm_neon_vqshiftns:
+ VShiftOpc = ARMISD::VQSHRNs; break;
+ case Intrinsic::arm_neon_vqshiftnu:
+ VShiftOpc = ARMISD::VQSHRNu; break;
+ case Intrinsic::arm_neon_vqshiftnsu:
+ VShiftOpc = ARMISD::VQSHRNsu; break;
+ case Intrinsic::arm_neon_vqrshiftns:
+ VShiftOpc = ARMISD::VQRSHRNs; break;
+ case Intrinsic::arm_neon_vqrshiftnu:
+ VShiftOpc = ARMISD::VQRSHRNu; break;
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ VShiftOpc = ARMISD::VQRSHRNsu; break;
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vshiftins: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+ VShiftOpc = ARMISD::VSLI;
+ else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+ VShiftOpc = ARMISD::VSRI;
+ else {
+ llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vqrshifts:
+ case Intrinsic::arm_neon_vqrshiftu:
+ // No immediate versions of these to check for.
+ break;
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them. As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+ return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+ }
+ return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue N0 = N->getOperand(0);
+
+ // Check for sign- and zero-extensions of vector extract operations of 8-
+ // and 16-bit vector elements. NEON supports these directly. They are
+ // handled during DAG combining because type legalization will promote them
+ // to 32-bit types and it is messy to recognize the operations after that.
+ if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Lane = N0.getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = N0.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (VT == MVT::i32 &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ TLI.isTypeLegal(Vec.getValueType()) &&
+ isa<ConstantSDNode>(Lane)) {
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ISD::SIGN_EXTEND:
+ Opc = ARMISD::VGETLANEs;
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Opc = ARMISD::VGETLANEu;
+ break;
+ }
+ return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+ SDValue Cmp = N->getOperand(4);
+ if (Cmp.getOpcode() != ARMISD::CMPZ)
+ // Only looking at EQ and NE cases.
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ // Simplify
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, y
+ // moveq r0, x
+ // to
+ // cmp r0, x
+ // movne r0, y
+ //
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, x
+ // movne r0, y
+ // to
+ // cmp r0, x
+ // movne r0, y
+ /// FIXME: Turn this into a target neutral optimization?
+ SDValue Res;
+ if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+ N->getOperand(3), Cmp);
+ } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+ SDValue ARMcc;
+ SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+ N->getOperand(3), NewCmp);
+ }
+
+ if (Res.getNode()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+ DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+ // Capture demanded bits information that would be otherwise lost.
+ if (KnownZero == 0xfffffffe)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i1));
+ else if (KnownZero == 0xffffff00)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i8));
+ else if (KnownZero == 0xffff0000)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i16));
+ }
+
+ return Res;
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ARMISD::BFI: return PerformBFICombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+ case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default: break;
+ }
+ break;
+ }
+ return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ if (!Subtarget->allowsUnalignedMem())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return true;
+ // FIXME: VLD1 etc with standard alignment is legal.
+ }
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ bool isNeg = false;
+ if (V < 0) {
+ isNeg = true;
+ V = - V;
+ }
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // + imm12 or - imm8
+ if (isNeg)
+ return V == (V & ((1LL << 8) - 1));
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ // Same as ARM mode. FIXME: NEON?
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb1Only())
+ return isLegalT1AddressImmediate(V, VT);
+ else if (Subtarget->isThumb2())
+ return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+ // ARM mode.
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+ EVT VT) const {
+ int Scale = AM.Scale;
+ if (Scale < 0)
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ Scale = Scale & ~1;
+ return Scale == 2 || Scale == 4 || Scale == 8;
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ EVT VT = getValueType(Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb1Only())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb2())
+ return isLegalT2ScaledAddressingMode(AM, VT);
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+ break;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return Imm >= 0 && Imm <= 255;
+}
+
+/// isLegalAddImmediate - Return true if the specified immediate is legal
+/// add immediate, that is the target has add instructions which can add
+/// a register with the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal=
+ ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+ return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+ isInc = Ptr->getOpcode() == ISD::ADD;
+ Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ // Looking for "rev" which is V6+.
+ if (!Subtarget->hasV6Ops())
+ return false;
+
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::string AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t,");
+
+ // rev $0, $1
+ if (AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+ IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (Ty && Ty->getBitWidth() == 32)
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ }
+
+ return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ case 'h': return C_RegisterClass;
+ case 'x': return C_RegisterClass;
+ case 't': return C_RegisterClass;
+ case 'j': return C_Other; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'Q': return C_Memory;
+ }
+ } else if (Constraint.size() == 2) {
+ switch (Constraint[0]) {
+ default: break;
+ // All 'U+' constraints are addresses.
+ case 'U': return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'l':
+ if (type->isIntegerTy()) {
+ if (Subtarget->isThumb())
+ weight = CW_SpecificReg;
+ else
+ weight = CW_Register;
+ }
+ break;
+ case 'w':
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC ARM Constraint Letters
+ switch (Constraint[0]) {
+ case 'l': // Low regs or general regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, ARM::tGPRRegisterClass);
+ else
+ return RCPair(0U, ARM::GPRRegisterClass);
+ case 'h': // High regs or no regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, ARM::hGPRRegisterClass);
+ break;
+ case 'r':
+ return RCPair(0U, ARM::GPRRegisterClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPRRegisterClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, ARM::DPRRegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, ARM::QPRRegisterClass);
+ break;
+ case 'x':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPR_8RegisterClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, ARM::DPR_8RegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, ARM::QPR_8RegisterClass);
+ break;
+ case 't':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPRRegisterClass);
+ break;
+ }
+ }
+ if (StringRef("{cc}").equals_lower(Constraint))
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Currently only support length 1 constraints.
+ if (Constraint.length() != 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'j':
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (ConstraintLetter) {
+ case 'j':
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
+ case 'I':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between 0 and 255, for ADD
+ // immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getT2SOImmVal(CVal) != -1)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb1Only()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The ARM target isn't yet aware of offsets.
+ return false;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (!Subtarget->hasVFP3())
+ return false;
+ if (VT == MVT::f32)
+ return ARM_AM::getFP32Imm(Imm) != -1;
+ if (VT == MVT::f64)
+ return ARM_AM::getFP64Imm(Imm) != -1;
+ return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_strexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_ldrexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000..5da9b27
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,540 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in
+ // DYN mode.
+ WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
+ // PIC mode.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPZ, // ARM compare that sets only Z flag.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+
+ BCC_i64,
+
+ RBIT, // ARM bitreverse instruction
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ ADDC, // Add with carry
+ ADDE, // Add using carry
+ SUBC, // Sub with carry
+ SUBE, // Sub using carry
+
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
+
+ TC_RETURN, // Tail call return pseudo.
+
+ THREAD_POINTER,
+
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
+ MEMBARRIER, // Memory barrier (DMB)
+ MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
+
+ VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
+ VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
+ VCGEU, // Vector compare unsigned greater than or equal.
+ VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
+ VCGTU, // Vector compare unsigned greater than.
+ VTST, // Vector test bits.
+
+ // Vector shift by immediate:
+ VSHL, // ...left
+ VSHRs, // ...right (signed)
+ VSHRu, // ...right (unsigned)
+ VSHLLs, // ...left long (signed)
+ VSHLLu, // ...left long (unsigned)
+ VSHLLi, // ...left long (with maximum shift count)
+ VSHRN, // ...right narrow
+
+ // Vector rounding shift by immediate:
+ VRSHRs, // ...right (signed)
+ VRSHRu, // ...right (unsigned)
+ VRSHRN, // ...right narrow
+
+ // Vector saturating shift by immediate:
+ VQSHLs, // ...left (signed)
+ VQSHLu, // ...left (unsigned)
+ VQSHLsu, // ...left (signed to unsigned)
+ VQSHRNs, // ...right narrow (signed)
+ VQSHRNu, // ...right narrow (unsigned)
+ VQSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector saturating rounding shift by immediate:
+ VQRSHRNs, // ...right narrow (signed)
+ VQRSHRNu, // ...right narrow (unsigned)
+ VQRSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector shift and insert:
+ VSLI, // ...left
+ VSRI, // ...right
+
+ // Vector get lane (VMOV scalar to ARM core register)
+ // (These are used for 8- and 16-bit element types only.)
+ VGETLANEu, // zero-extend vector extract element
+ VGETLANEs, // sign-extend vector extract element
+
+ // Vector move immediate and move negated immediate:
+ VMOVIMM,
+ VMVNIMM,
+
+ // Vector duplicate:
+ VDUP,
+ VDUPLANE,
+
+ // Vector shuffles:
+ VEXT, // extract
+ VREV64, // reverse elements within 64-bit doublewords
+ VREV32, // reverse elements within 32-bit words
+ VREV16, // reverse elements within 16-bit halfwords
+ VZIP, // zip (interleave)
+ VUZP, // unzip (deinterleave)
+ VTRN, // transpose
+ VTBL1, // 1-register shuffle with mask
+ VTBL2, // 2-register shuffle with mask
+
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN,
+
+ // Bit-field insert
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM,
+ // Vector AND with NOT of immediate
+ VBICIMM,
+
+ // Vector bitwise select
+ VBSL,
+
+ // Vector load N-element structure to all lanes:
+ VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD,
+
+ // 64-bit atomic ops (value split into two registers)
+ ATOMADD64_DAG,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+ ATOMCMPXCHG64_DAG
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace ARM {
+ bool isBitFieldInvertedMask(unsigned v);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ public:
+ explicit ARMTargetLowering(TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding(void) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual void
+ AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
+
+ SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+ bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalAddImmediate(int64_t Imm) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ const ARMSubtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ const TargetRegisterInfo *RegInfo;
+
+ const InstrItineraryData *Itins;
+
+ /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
+ void addDRTypeForNEON(EVT VT);
+ void addQRTypeForNEON(EVT VT);
+
+ typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+ void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+ const;
+
+ void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ /// HandleByVal - Target-specific cleanup for ByVal support.
+ virtual void HandleByVal(CCState *, unsigned &) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Op1,
+ unsigned Op2,
+ bool NeedsCarry = false,
+ bool IsCmpxchg = false) const;
+ MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const;
+
+ void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB) const;
+
+ void SetupEntryBlockForSjLj(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const;
+
+ MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+ };
+
+ enum NEONModImmType {
+ VMOVModImm,
+ VMVNModImm,
+ OtherModImm
+ };
+
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 0000000..7cbc911
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,1965 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def LdStExFrm : Format<11>;
+
+def ArithMiscFrm : Format<12>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
+def DPSoRegImmFrm : Format<42>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<3> val> {
+ bits<3> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain : Domain<1>; // Instructions in VFP domain only
+def NeonDomain : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+ let PrintMethod = "printCPSIMod";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let ParserMethod = "parseProcIFlagsOperand";
+}
+def iflags_op : Operand<i32> {
+ let PrintMethod = "printCPSIFlag";
+ let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; }
+def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
+ let DecoderMethod = "DecodePredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
+}
+
+// ARM special operands for disassembly only.
+//
+def SetEndAsmOperand : AsmOperandClass {
+ let Name = "SetEndImm";
+ let ParserMethod = "parseSetEndImm";
+}
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+ let ParserMatchClass = SetEndAsmOperand;
+}
+
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let ParserMethod = "parseMSRMaskOperand";
+}
+def msr_mask : Operand<i32> {
+ let PrintMethod = "printMSRMaskOperand";
+ let DecoderMethod = "DecodeMSRMask";
+ let ParserMatchClass = MSRMaskOperand;
+}
+
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+// Offset Encoding
+// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8 : Operand<i32> {
+ let EncoderMethod = "getShiftRight8Imm";
+ let DecoderMethod = "DecodeShiftRight8Imm";
+}
+def shr_imm16 : Operand<i32> {
+ let EncoderMethod = "getShiftRight16Imm";
+ let DecoderMethod = "DecodeShiftRight16Imm";
+}
+def shr_imm32 : Operand<i32> {
+ let EncoderMethod = "getShiftRight32Imm";
+ let DecoderMethod = "DecodeShiftRight32Imm";
+}
+def shr_imm64 : Operand<i32> {
+ let EncoderMethod = "getShiftRight64Imm";
+ let DecoderMethod = "DecodeShiftRight64Imm";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Assembler alias templates.
+//
+class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>;
+class tInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>;
+class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
+class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+
+class InstTemplate<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : Instruction {
+ let Namespace = "ARM";
+
+ AddrMode AM = am;
+ int Size = sz;
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+ Format F = f;
+ bits<6> Form = F.Value;
+ Domain D = d;
+ bit isUnaryDataProc = 0;
+ bit canXformTo16Bit = 0;
+ // The instruction is a 16-bit flag setting Thumb instruction. Used
+ // by the parser to determine whether to require the 'S' suffix on the
+ // mnemonic (when not in an IT block) or preclude it (when in an IT block).
+ bit thumbArithFlagSetting = 0;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ // The layout of TSFlags should be kept in sync with ARMBaseInfo.h.
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{6-5} = IndexModeBits;
+ let TSFlags{12-7} = Form;
+ let TSFlags{13} = isUnaryDataProc;
+ let TSFlags{14} = canXformTo16Bit;
+ let TSFlags{17-15} = D.Value;
+ let TSFlags{18} = thumbArithFlagSetting;
+
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+class Encoding {
+ field bits<32> Inst;
+}
+
+class InstARM<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding {
+ let DecoderNamespace = "ARM";
+}
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin> {
+ let DecoderNamespace = "Thumb";
+}
+
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+ GenericDomain, "", itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let Pattern = pattern;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : tPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : t2PseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
+ asm, "", pattern>;
+
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b11111001;
+ let Inst{3-0} = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = b;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b00001001;
+ let Inst{3-0} = Rt2;
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = isByte;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-26} = 0b01;
+ let Inst{24} = isPre; // P bit
+ let Inst{22} = isByte; // B bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = isLd; // L bit
+ let Inst{15-12} = Rt;
+}
+class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+}
+
+class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+}
+
+
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 0; // P bit
+ let Inst{21} = 1;
+ let Inst{20} = isLoad; // L bit
+ let Inst{19-16} = addr; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = 0; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
+ bits<4> p;
+ bits<16> regs;
+ bits<4> Rn;
+ let Inst{31-28} = p;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0; // S bit
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
+ opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
+ let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// PKH instructions
+def PKHLSLAsmOperand : AsmOperandClass {
+ let Name = "PKHLSLImm";
+ let ParserMethod = "parsePKHLSLImm";
+}
+def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{
+ let PrintMethod = "printPKHLSLShiftImm";
+ let ParserMatchClass = PKHLSLAsmOperand;
+}
+def PKHASRAsmOperand : AsmOperandClass {
+ let Name = "PKHASRImm";
+ let ParserMethod = "parsePKHASRImm";
+}
+def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{
+ let PrintMethod = "printPKHASRShiftImm";
+ let ParserMatchClass = PKHASRAsmOperand;
+}
+
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<5> sh;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh;
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5T];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
+ pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
+ Encoding {
+ let Inst{31-27} = opcod1;
+ let Inst{15-14} = opcod2;
+ let Inst{12} = opcod3;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin,
+ asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = !con(oops, (outs s_cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ let thumbArithFlagSetting = 1;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "ThumbSBit";
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+ let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+ let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+ let Inst{15-14} = 0b00;
+ let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010000;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+ let Inst{15-12} = opA;
+ let Inst{11-9} = opB;
+}
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+
+class T1BranchCond<bits<4> opcode> : Encoding16 {
+ let Inst{15-12} = opcode;
+}
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+// 0b0110 => Immediate, 4 bytes
+// 0b1000 => Immediate, 2 bytes
+// 0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<0b0101, opcode> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{8-6} = addr{5-3}; // Rm
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<opA, {opB,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-6} = addr{7-3}; // imm5
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+ let Inst{15-12} = 0b1011;
+ let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "Thumb";
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<13> addr;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = addr{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = addr{7-0};
+}
+class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ bits<9> imm;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = imm{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
+ : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+ let Inst{31-28} = opc;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
+
+// T2Ipreldst - Thumb2 pre-indexed load / store instructions.
+class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = addr{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// T2Ipostldst - Thumb2 post-indexed load / store instructions.
+class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<4> Rn;
+ bits<9> offset;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = offset{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = offset{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode.
+class T2v6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasV6T2];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Dd{4};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Dd{3-0};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{12};
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+}
+
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{8};
+ let Inst{15-12} = regs{12-9};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision, unary
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = opcod4;
+ let Inst{6} = 1;
+ let Inst{4} = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ // size (fixed-point number): sx == 0 ? 16 : 32
+ let Inst{7} = op5; // sx
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+ cstr, pattern> {
+ let Inst{31-24} = 0b11110100;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+
+ let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+ let DecoderNamespace = "NEONLoadStore";
+
+ bits<5> Vd;
+ bits<6> Rn;
+ bits<4> Rm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+ dt, asm, cstr, pattern> {
+ bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+ bit op5, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{23} = op23;
+ let Inst{21-19} = op21_19;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
+ let DecoderMethod = "DecodeNEONModImmInstruction";
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bit lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bits<2> lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{2-0} = Vm{2-0};
+ let Inst{5} = lane{1};
+ let Inst{3} = lane{0};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
+ "", itin> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
+ // A8.6.303, A8.6.328, A8.6.329
+ let Inst{3-0} = 0b0000;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+
+ let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+ let DecoderNamespace = "NEONDup";
+
+ bits<5> V;
+ bits<4> R;
+ bits<4> p;
+ bits<4> lane;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = V{4};
+ let Inst{19-16} = V{3-0};
+ let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+ opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 0000000..48da03f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,68 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ return ARM::LDRi12;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ return ARM::LDRBi12;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ return ARM::STRi12;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ return ARM::STRBi12;
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 0000000..f2c7bdc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,44 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+ ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 0000000..2cf0f09
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,5004 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMBr2JT : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+ SDTCisInt<1>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInGlue]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+ [SDNPHasChain]>;
+
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+ [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
+def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+ SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
+
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
+ AssemblerPredicate<"HasV4TOps">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
+ AssemblerPredicate<"HasV5TEOps">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
+ AssemblerPredicate<"HasV6Ops">;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
+ AssemblerPredicate<"HasV6T2Ops">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
+ AssemblerPredicate<"HasV7Ops">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
+ AssemblerPredicate<"FeatureVFP2">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
+ AssemblerPredicate<"FeatureVFP3">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">,
+ AssemblerPredicate<"FeatureFP16">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">,
+ AssemblerPredicate<"FeatureHWDiv">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+ AssemblerPredicate<"FeatureT2XtPk">;
+def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
+ AssemblerPredicate<"FeatureDSPThumb2">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate<"FeatureDB">;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate<"FeatureMP">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">,
+ AssemblerPredicate<"ModeThumb">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
+ AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsMClass : Predicate<"Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass">;
+def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"!FeatureMClass">;
+def IsARM : Predicate<"!Subtarget->isThumb()">,
+ AssemblerPredicate<"!ModeThumb">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"ModeNaCl">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
+ }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+ // Returns true if all low 16-bits are 0.
+ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
+class BinOpWithFlagFrag<dag res> :
+ PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+ return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeT2BROperand";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+ let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+ let EncoderMethod = "getARMBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def blx_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBLXTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; }
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
+}
+
+def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
+def dpr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = DPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeDPRRegListOperand";
+}
+
+def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; }
+def spr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = SPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeSPRRegListOperand";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+ let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+ let DecoderMethod = "DecodeVCVTImmOperand";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm_XFORM: SDNodeXForm<imm, [{
+ switch (N->getZExtValue()){
+ default: assert(0);
+ case 0: return CurDAG->getTargetConstant(0, MVT::i32);
+ case 8: return CurDAG->getTargetConstant(1, MVT::i32);
+ case 16: return CurDAG->getTargetConstant(2, MVT::i32);
+ case 24: return CurDAG->getTargetConstant(3, MVT::i32);
+ }
+}]>;
+def RotImmAsmOperand : AsmOperandClass {
+ let Name = "RotImm";
+ let ParserMethod = "parseRotImm";
+}
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }],
+ rot_imm_XFORM> {
+ let PrintMethod = "printRotImmOperand";
+ let ParserMatchClass = RotImmAsmOperand;
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 encoded as imm5 == 0.
+def ShifterImmAsmOperand : AsmOperandClass {
+ let Name = "ShifterImm";
+ let ParserMethod = "parseShifterImm";
+}
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+}
+
+// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm.
+def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
+def so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectRegShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm);
+}
+
+def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; }
+def so_reg_imm : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectImmShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftRegShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_imm : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.
+def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; }
+def so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }]> {
+ let EncoderMethod = "getSOImmOpValue";
+ let ParserMatchClass = SOImmAsmOperand;
+ let DecoderMethod = "DecodeSOImmOperand";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+ if (Subtarget->hasV6T2Ops())
+ return true;
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_7 predicate - Immediate in the range [0,7].
+def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 8;
+}]> {
+ let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,15].
+def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 16;
+}]> {
+ let ParserMatchClass = Imm0_15AsmOperand;
+}
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; }
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_31AsmOperand;
+}
+
+/// imm0_255 predicate - Immediate in the range [0,255].
+def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let ParserMatchClass = Imm0_255AsmOperand;
+}
+
+// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
+// a relocatable expression.
+//
+// FIXME: This really needs a Thumb version separate from the ARM version.
+// While the range is the same, and can thus use the same match class,
+// the encoding is different so it should have a different encoder method.
+def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; }
+def imm0_65535_expr : Operand<i32> {
+ let EncoderMethod = "getHiLo16ImmOpValue";
+ let ParserMatchClass = Imm0_65535ExprAsmOperand;
+}
+
+/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
+def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; }
+def imm24b : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 0xffffff;
+}]> {
+ let ParserMatchClass = Imm24bitAsmOperand;
+}
+
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def BitfieldAsmOperand : AsmOperandClass {
+ let Name = "Bitfield";
+ let ParserMethod = "parseBitfield";
+}
+def bf_inv_mask_imm : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+ let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+ let PrintMethod = "printBitfieldInvMaskImmOperand";
+ let DecoderMethod = "DecodeBitfieldMaskOperand";
+ let ParserMatchClass = BitfieldAsmOperand;
+}
+
+def imm1_32_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+}]>;
+def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+ }],
+ imm1_32_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_32AsmOperand;
+}
+
+def imm1_16_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+}]>;
+def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+ imm1_16_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_16AsmOperand;
+}
+
+// Define ARM specific addressing modes.
+// addrmode_imm12 := reg +/- imm12
+//
+def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
+def addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+ // 12-bit immediate operand. Note that instructions using this encode
+ // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+ // immediate values are as normal.
+
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let DecoderMethod = "DecodeAddrModeImm12Operand";
+ let ParserMatchClass = MemImm12OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
+def ldst_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ let EncoderMethod = "getLdStSORegOpValue";
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let DecoderMethod = "DecodeSORegMemOperand";
+ let ParserMatchClass = MemRegOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift);
+}
+
+// postidx_imm8 := +/- [0,255]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value.
+def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
+def postidx_imm8 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8Operand";
+ let ParserMatchClass = PostIdxImm8AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+// postidx_imm8s4 := +/- [0,1020]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value, scaled by 4.
+def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
+def postidx_imm8s4 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8s4Operand";
+ let ParserMatchClass = PostIdxImm8s4AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// postidx_reg := +/- reg
+//
+def PostIdxRegAsmOperand : AsmOperandClass {
+ let Name = "PostIdxReg";
+ let ParserMethod = "parsePostIdxReg";
+}
+def postidx_reg : Operand<i32> {
+ let EncoderMethod = "getPostIdxRegOpValue";
+ let DecoderMethod = "DecodePostIdxReg";
+ let PrintMethod = "printPostIdxRegOperand";
+ let ParserMatchClass = PostIdxRegAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+
+// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
+//
+// FIXME: addrmode2 should be refactored the rest of the way to always
+// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
+def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let EncoderMethod = "getAddrMode2OpValue";
+ let PrintMethod = "printAddrMode2Operand";
+ let ParserMatchClass = AddrMode2AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def PostIdxRegShiftedAsmOperand : AsmOperandClass {
+ let Name = "PostIdxRegShifted";
+ let ParserMethod = "parsePostIdxReg";
+}
+def am2offset_reg : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ // When using this for assembly, it's always as a post-index offset.
+ let ParserMatchClass = PostIdxRegShiftedAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
+// the GPR is purely vestigal at this point.
+def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
+def am2offset_imm : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let ParserMatchClass = AM2OffsetImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+// FIXME: split into imm vs. reg versions.
+def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let EncoderMethod = "getAddrMode3OpValue";
+ let PrintMethod = "printAddrMode3Operand";
+ let ParserMatchClass = AddrMode3AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+// FIXME: split into imm vs. reg versions.
+// FIXME: parser method to handle +/- register.
+def AM3OffsetAsmOperand : AsmOperandClass {
+ let Name = "AM3Offset";
+ let ParserMethod = "parseAM3Offset";
+}
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode3OffsetOpValue";
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let ParserMatchClass = AM3OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+ let EncoderMethod = "getLdStmModeOpValue";
+ let PrintMethod = "printLdStmModeOperand";
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let EncoderMethod = "getAddrMode5OpValue";
+ let DecoderMethod = "DecodeAddrMode5Operand";
+ let ParserMatchClass = AddrMode5AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm);
+}
+
+// addrmode6 := reg with optional alignment
+//
+def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
+def addrmode6 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+ let ParserMatchClass = AddrMode6AsmOperand;
+}
+
+def am6offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printAddrMode6OffsetOperand";
+ let MIOperandInfo = (ops GPR);
+ let EncoderMethod = "getAddrMode6OffsetOpValue";
+ let DecoderMethod = "DecodeGPRRegisterClass";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
+// (single element from one lane) for size 32.
+def addrmode6oneL32 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addr_offset_none := reg
+//
+def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
+def addr_offset_none : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
+ let PrintMethod = "printAddrMode7Operand";
+ let DecoderMethod = "DecodeAddrMode7Operand";
+ let ParserMatchClass = MemNoOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base);
+}
+
+def nohash_imm : Operand<i32> {
+ let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let ParserMethod = "parseCoprocNumOperand";
+}
+def p_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+ let DecoderMethod = "DecodeCoprocessor";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let ParserMethod = "parseCoprocRegOperand";
+}
+def c_imm : Operand<i32> {
+ let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
+}
+def CoprocOptionAsmOperand : AsmOperandClass {
+ let Name = "CoprocOption";
+ let ParserMethod = "parseCoprocOptionOperand";
+}
+def coproc_option_imm : Operand<i32> {
+ let PrintMethod = "printCoprocOptionImm";
+ let ParserMatchClass = CoprocOptionAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+
+}
+
+/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the AsI1_bin_irs counterpart.
+multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+
+}
+
+/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>;
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>;
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+}
+
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>;
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>;
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>;
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+ }
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rsi : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
+}
+
+class AI_ext_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
+}
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn,
+ (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+}
+
+class AI_exta_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc, bit Commutable = 0> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+ }
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+}
+
+/// AI1_rsc_irs - Define instructions and patterns for rsc
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+ }
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPR:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
+// (These psuedos use a hand-written selection code).
+let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
+def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2),
+ NoItinerary, []>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
+ "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+ []>, Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "bkpt", "\t$val", []>, Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010010;
+ let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State
+// FIXME: We should use InstAlias to handle the optional operands.
+class CPS<dag iops, string asm_ops>
+ : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+ []>, Requires<[IsARM]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{19-18} = imod;
+ let Inst{17} = M; // Enabled if mode is set;
+ let Inst{16} = 0;
+ let Inst{8-6} = iflags;
+ let Inst{5} = 0;
+ let Inst{4-0} = mode;
+}
+
+let DecoderMethod = "DecodeCPSInstruction" in {
+let M = 1 in
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+ def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">;
+}
+
+// Preload signals the memory system of possible future data/instruction access.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+ def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$addr"),
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$shift"),
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ bits<17> shift;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = shift{11-0};
+ let Inst{4} = 0;
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end", []>, Requires<[IsARM]> {
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+ []>, Requires<[IsARM, HasV7]> {
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM]> {
+ let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ 4, IIC_iALUr,
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_r,
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+ addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
+ bits<4> Rd;
+ bits<14> label;
+ let Inst{27-25} = 0b001;
+ let Inst{24} = 0;
+ let Inst{23-22} = label{13-12};
+ let Inst{21} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = label{11-0};
+}
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 4, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // ARMV4T and above
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "bx", "\tlr", [(ARMretflag)]>,
+ Requires<[IsARM, HasV4T]> {
+ let Inst{27-0} = 0b0001001011111111111100011110;
+ }
+
+ // ARMV4 only
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "mov", "\tpc, lr", [(ARMretflag)]>,
+ Requires<[IsARM, NoV4T]> {
+ let Inst{27-0} = 0b0001101000001111000000001110;
+ }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ // ARMV4T and above
+ def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+
+ def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+ "bx", "\t$dst", [/* pattern left blank */]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{27-4} = 0b000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
+ // FIXME: Do we really need a non-predicated version? If so, it should
+ // at least be a pseudo instruction expanding to the predicated version
+ // at MC lowering time.
+ Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [SP] in {
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsARM, IsNotDarwin]> {
+ let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl", "\t$func",
+ [(ARMcall_pred tglobaladdr:$func)]>,
+ Requires<[IsARM, IsNotDarwin]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx\t$func",
+ [(ARMcall GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx", "\t$func",
+ [(ARMcall_pred GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsNotDarwin]>;
+
+ // ARMv4
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsNotDarwin]>;
+}
+
+let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
+ Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
+ Requires<[IsARM, IsDarwin]>;
+
+ def BLr9_pred : ARMPseudoExpand<(outs),
+ (ins bl_target:$func, pred:$p, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_pred tglobaladdr:$func)],
+ (BL_pred bl_target:$func, pred:$p)>,
+ Requires<[IsARM, IsDarwin]>;
+
+ // ARMv5T and above
+ def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall GPR:$func)],
+ (BLX GPR:$func)>,
+ Requires<[IsARM, HasV5T, IsDarwin]>;
+
+ def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_pred GPR:$func)],
+ (BLX_pred GPR:$func, pred:$p)>,
+ Requires<[IsARM, HasV5T, IsDarwin]>;
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsDarwin]>;
+
+ // ARMv4
+ def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+ IIC_Br, "b", "\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ bits<24> target;
+ let Inst{23-0} = target;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ let isBarrier = 1 in {
+ // B is "predicable" since it's just a Bcc with an 'always' condition.
+ let isPredicable = 1 in
+ // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+ // should be sufficient.
+ // FIXME: Is B really a Barrier? That doesn't seem right.
+ def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+ [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
+
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : ARMPseudoInst<(outs),
+ (ins GPR:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+ // into i12 and rs suffixed versions.
+ def BR_JTm : ARMPseudoInst<(outs),
+ (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : ARMPseudoInst<(outs),
+ (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+}
+
+// BLX (immediate)
+def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", []>,
+ Requires<[IsARM, HasV5T]> {
+ let Inst{31-25} = 0b1111101;
+ bits<25> target;
+ let Inst{23-0} = target{24-1};
+ let Inst{24} = target{0};
+}
+
+// Branch and Exchange Jazelle
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* pattern left blank */]> {
+ bits<4> func;
+ let Inst{23-20} = 0b0010;
+ let Inst{19-8} = 0xfff;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = func;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
+
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
+
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM, IsDarwin]>;
+
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM, IsDarwin]>;
+
+ }
+
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+ def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+ def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM, IsNotDarwin]>;
+
+ def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM, IsNotDarwin]>;
+ }
+}
+
+// Secure Monitor Call is a system instruction.
+def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []> {
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
+}
+
+// Supervisor Call (Software Interrupt)
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
+}
+
+// Store Return State
+class SRSI<bit wb, string asm>
+ : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<5> mode;
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 1;
+ let Inst{21} = wb;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1101; // SP
+ let Inst{15-5} = 0b00000101000;
+ let Inst{4-0} = mode;
+}
+
+def SRSDA : SRSI<0, "srsda\tsp, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDB : SRSI<0, "srsdb\tsp, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSIA : SRSI<0, "srsia\tsp, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIB : SRSI<0, "srsib\tsp, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+
+// Return From Exception
+class RFEI<bit wb, string asm>
+ : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<4> Rn;
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0;
+ let Inst{21} = wb;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xa00;
+}
+
+def RFEDA : RFEI<0, "rfeda\t$Rn"> {
+ let Inst{24-23} = 0;
+}
+def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> {
+ let Inst{24-23} = 0;
+}
+def RFEDB : RFEI<0, "rfedb\t$Rn"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEIA : RFEI<0, "rfeia\t$Rn"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIB : RFEI<0, "rfeib\t$Rn"> {
+ let Inst{24-23} = 0b11;
+}
+def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
+ let Inst{24-23} = 0b11;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+ []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
+
+// Loads with zero extension
+def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+ (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr",
+ []>, Requires<[IsARM, HasV5TE]>;
+}
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let DecoderMethod = "DecodeLDRPreImm";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12";
+ }
+
+ def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeLDRPreReg";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2";
+ }
+
+ def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ }
+ def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, itin,
+ opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtLdrdPre";
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+} // hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+multiclass AI3ldrT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+ }
+}
+
+defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">;
+defm LDRHT : AI3ldrT<0b1011, "ldrht">;
+defm LDRSHT : AI3ldrT<0b1111, "ldrsht">;
+}
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+ IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+ [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_d_r,
+ "strd", "\t$Rt, $src2, $addr", []>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{21} = 0;
+}
+
+// Indexed stores
+multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ StFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0}; // imm12
+ let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12";
+ let DecoderMethod = "DecodeSTRPreImm";
+ }
+
+ def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, ldst_so_reg:$addr),
+ IndexModePre, StFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0; // Inst{4} = 0
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode2";
+ let DecoderMethod = "DecodeSTRPreReg";
+ }
+ def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+defm STR : AI2_stridx<0, "str", IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>;
+}
+
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STR_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+}
+
+
+
+def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ addr_offset_none:$addr,
+ am3offset:$offset))]> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ IndexModePre, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtStrdPre";
+}
+
+def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr,
+ am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// STRT, STRBT, and STRHT
+
+def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+}
+
+
+multiclass AI3strT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackReg";
+ }
+}
+
+
+defm STRHT : AI3strT<0b1011, "strht">;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // IA is the default, so no need for an explicit suffix on the
+ // mnemonic here. Without it is the cannonical spelling.
+ def IA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def DA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def DB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def IB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
+ (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
+
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
+ DPSoRegRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src",
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-8} = src{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = src{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
+ DPSoRegImmFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-5} = src{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
+ Requires<[IsARM, HasV6T2]>, UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
+}
+
+def : InstAlias<"mov${p} $Rd, $imm",
+ (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>,
+ Requires<[IsARM]>;
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
+ (ins GPR:$src, imm0_65535_expr:$imm),
+ DPFrm, IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set GPRnopc:$Rd,
+ (or (and GPR:$src, 0xffff),
+ lo16AllZero:$imm))]>, UnaryDP,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+ Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+def SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+def SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+def SXTAB : AI_exta_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+def SXTAH : AI_exta_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+def UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+def UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+def UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 3)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+ (UXTB16 GPR:$Src, 1)>;
+
+def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111101;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+def UBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111111;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
+
+// ADD and SUB with 's' bit set.
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm ADDS : AsI1_bin_s_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
+ "ADC", 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
+ "SBC">;
+
+defm RSB : AsI1_rbin_irs <0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm RSBS : AsI1_rbin_s_is<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+defm RSC : AI1_rsc_irs<0b0111, "rsc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
+ "RSC">;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = [],
+ dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = op27_20;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract
+
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract
+
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract
+
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate].
+
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate
+
+def SSAT : AI<(outs GPRnopc:$Rd),
+ (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110101;
+ let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
+ let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101010;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPRnopc:$Rd),
+ (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110111;
+ let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101110;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
+ (SSAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
+ (USAT imm:$pos, GPRnopc:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+
+// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
+// like in the actual instruction encoding. The complexity of mapping the mask
+// to the lsb/msb pair should be handled by ISel, not encapsulated in the
+// instruction description.
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // msb
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
+}
+
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+}
+def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+// property. Remove them when it's possible to add those properties
+// on an individual MachineInstr, not just an instuction description.
+let isCommutable = 1 in {
+def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b0000;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ pred:$p, cc_out:$s),
+ 4, IIC_iMUL32,
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
+ (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ 4, IIC_iMAC32,
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
+ (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> Ra;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+}
+
+// Multiply + accumulate
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, IIC_iMAC64, [],
+ (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+} // neverHasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ let DecoderMethod = "DecodeSMLAInstruction" in {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ }
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y>.
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld.
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22} = long;
+ let Inst{21-20} = 0b00;
+ let Inst{11-8} = Rm;
+ let Inst{7} = 0;
+ let Inst{6} = sub;
+ let Inst{5} = swap;
+ let Inst{4} = 1;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+ def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+let AddedComplexity = 5 in
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>;
+
+let AddedComplexity = 5 in
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>;
+
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl GPR:$Rm, (i32 8)), 0xFF)),
+ (REVSH GPR:$Rm)>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
+ (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
+ (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (srl GPRnopc:$src2, imm16_31:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+ (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+ (CMPrr GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
+ (CMPrsi GPR:$src, so_reg_imm:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
+ (CMPrsr GPR:$src, so_reg_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// When x is 0 and unsigned:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
+//defm CMN : AI1_cmp_irs<0b1011, "cmn",
+// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+defm CMNz : AI1_cmp_irs<0b1011, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+// (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+ (CMNzri GPR:$src, so_imm_neg:$imm)>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+ 4, IIC_iCMOVr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_imm:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_reg:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+
+let isMoveImm = 1 in
+def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
+ 4, IIC_iMOVi,
+ []>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+
+let isMoveImm = 1 in
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, pred:$p),
+ 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let ParserMethod = "parseMemBarrierOptOperand";
+}
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
+ let DecoderMethod = "DecodeMemBarrierOption";
+}
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff05;
+ let Inst{3-0} = opt;
+}
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dsb", "\t$opt", []>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff04;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "isb", "\t$opt", []>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff06;
+ let Inst{3-0} = opt;
+}
+
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def ABS : ARMPseudoInst<
+ (outs GPR:$dst), (ins GPR:$src),
+ 8, NoItinerary, []>;
+}
+
+let usesCustomInserter = 1 in {
+ let Defs = [CPSR] in {
+ def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary,
+ "ldrexb", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrexh", "\t$Rt, $addr", []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
+}
+
+let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
+
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+// SWP/SWPB are deprecated in V6/V7.
+let mayLoad = 1, mayStore = 1 in {
+def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr),
+ "swp", []>;
+def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr),
+ "swpb", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{31-28} = 0b1111;
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{27-25} = 0b110;
+}
+class ACInoP<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b110;
+}
+multiclass LdStCop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : ACI<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : ACInoP<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm LDC : LdStCop <1, 0, "ldc">;
+defm LDCL : LdStCop <1, 1, "ldcl">;
+defm STC : LdStCop <0, 0, "stc">;
+defm STCL : LdStCop <0, 1, "stcl">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
+defm STC2 : LdSt2Cop<0, 0, "stc2">;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register.
+//
+
+class MovRCopro<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABI<0b1110, oops, iops, NoItinerary, opc,
+ "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> {
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
+
+def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRCopro2<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABXI<0b1110, oops, iops, NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
+ let Inst{31-28} = 0b1111;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
+
+def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
+ imm:$CRm, imm:$opc2),
+ (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
+ : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
+ : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, apsr", []> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b00001111;
+ let Inst{15-12} = Rd;
+ let Inst{7-4} = 0b0000;
+}
+
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>;
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, spsr", []> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b01001111;
+ let Inst{15-12} = Rd;
+ let Inst{7-4} = 0b0000;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+ "msr", "\t$mask, $Rn", []> {
+ bits<5> mask;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
+ "msr", "\t$mask, $a", []> {
+ bits<5> mask;
+ bits<12> a;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = a;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+ [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+ Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+ def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+ 4, IIC_Br, [(brind GPR:$dst)],
+ (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+ Requires<[IsARM, NoV4T]>;
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+ Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+ Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+ Requires<[IsARM, HasV6]>;
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)),
+ (UXTAB GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)),
+ (UXTAH GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>;
+
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)),
+ (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)),
+ (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>;
+
+// Atomic load/store patterns
+def : ARMPat<(atomic_load_8 ldst_so_reg:$src),
+ (LDRBrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_8 addrmode_imm12:$src),
+ (LDRBi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_load_16 addrmode3:$src),
+ (LDRH addrmode3:$src)>;
+def : ARMPat<(atomic_load_32 ldst_so_reg:$src),
+ (LDRrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_32 addrmode_imm12:$src),
+ (LDRi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val),
+ (STRBrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val),
+ (STRBi12 GPR:$val, addrmode_imm12:$ptr)>;
+def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val),
+ (STRH GPR:$val, addrmode3:$ptr)>;
+def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val),
+ (STRrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val),
+ (STRi12 GPR:$val, addrmode_imm12:$ptr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"ldmea", "ldmdb">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+
+// PUSH/POP aliases for STM/LDM
+def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// SSAT/USAT optional shift operand.
+def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+
+
+// Extend instruction optional rotate operand.
+def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb${p} $Rd, $Rm",
+ (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb16${p} $Rd, $Rm",
+ (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxth${p} $Rd, $Rm",
+ (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb${p} $Rd, $Rm",
+ (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb16${p} $Rd, $Rm",
+ (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxth${p} $Rd, $Rm",
+ (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+
+// RFE aliases
+def : MnemonicAlias<"rfefa", "rfeda">;
+def : MnemonicAlias<"rfeea", "rfedb">;
+def : MnemonicAlias<"rfefd", "rfeia">;
+def : MnemonicAlias<"rfeed", "rfeib">;
+def : MnemonicAlias<"rfe", "rfeia">;
+
+// SRS aliases
+def : MnemonicAlias<"srsfa", "srsda">;
+def : MnemonicAlias<"srsea", "srsdb">;
+def : MnemonicAlias<"srsfd", "srsia">;
+def : MnemonicAlias<"srsed", "srsib">;
+def : MnemonicAlias<"srs", "srsia">;
+
+// QSAX == QSUBADDX
+def : MnemonicAlias<"qsubaddx", "qsax">;
+// SASX == SADDSUBX
+def : MnemonicAlias<"saddsubx", "sasx">;
+// SHASX == SHADDSUBX
+def : MnemonicAlias<"shaddsubx", "shasx">;
+// SHSAX == SHSUBADDX
+def : MnemonicAlias<"shsubaddx", "shsax">;
+// SSAX == SSUBADDX
+def : MnemonicAlias<"ssubaddx", "ssax">;
+// UASX == UADDSUBX
+def : MnemonicAlias<"uaddsubx", "uasx">;
+// UHASX == UHADDSUBX
+def : MnemonicAlias<"uhaddsubx", "uhasx">;
+// UHSAX == UHSUBADDX
+def : MnemonicAlias<"uhsubaddx", "uhsax">;
+// UQASX == UQADDSUBX
+def : MnemonicAlias<"uqaddsubx", "uqasx">;
+// UQSAX == UQSUBADDX
+def : MnemonicAlias<"uqsubaddx", "uqsax">;
+// USAX == USUBADDX
+def : MnemonicAlias<"usubaddx", "usax">;
+
+// LDRSBT/LDRHT/LDRSHT post-index offset if optional.
+// Note that the write-back output register is a dummy operand for MC (it's
+// only meaningful for codegen), so we just pass zero here.
+// FIXME: tblgen not cooperating with argument conversions.
+//def : InstAlias<"ldrsbt${p} $Rt, $addr",
+// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>;
+//def : InstAlias<"ldrht${p} $Rt, $addr",
+// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
+//def : InstAlias<"ldrsht${p} $Rt, $addr",
+// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 0000000..7aad186
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,5030 @@
+//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// NEON-specific Operands.
+//===----------------------------------------------------------------------===//
+def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
+def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
+def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
+def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = VectorIndex8Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = VectorIndex16Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = VectorIndex32Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
+
+def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates. The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types. The "SHINS" version is for shift and insert operations.
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
+def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
+def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
+def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvbsl : SDNode<"ARMISD::VBSL",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
+
+def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON operand definitions
+//===----------------------------------------------------------------------===//
+
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
+ "$src = $dst">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1,
+ "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD1x2,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
+
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+ "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VLD1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
+
+def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+ []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
+
+def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+
+// VLD2 : Vector Load (multiple 2-element structures)
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD2,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD2x2,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
+def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
+def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
+def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
+def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+
+// VLD3 : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// VLD4 : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+// VLD1LN : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
+}
+class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6oneL32:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+ let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+ (i32 (LoadOp addrmode6:$addr)),
+ imm:$lane))];
+}
+
+def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$src = $Vd, $Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVLD1LN";
+}
+
+def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+// VLD2LN : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
+}
+
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+ "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
+}
+
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+// VLD3LN : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD3LN";
+}
+
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VLD3lnu, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+ []> {
+ let DecoderMethod = "DecodeVLD3LN";
+}
+
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+// VLD4LN : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN";
+}
+
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+ []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN" ;
+}
+
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// VLD1DUP : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+ [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+ let Pattern = [(set QPR:$dst,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+
+def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
+// VLD2DUP : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+}
+
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+}
+
+def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
+// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6dup:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+// VLD4DUP : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6dup:$Rn), IIC_VLD4dup,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
+}
+
+def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
+}
+
+def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+// VST1 : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+ IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+ "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST1d8 : VST1D<{0,0,0,?}, "8">;
+def VST1d16 : VST1D<{0,1,0,?}, "16">;
+def VST1d32 : VST1D<{1,0,0,?}, "32">;
+def VST1d64 : VST1D<{1,1,0,?}, "64">;
+
+def VST1q8 : VST1Q<{0,0,?,?}, "8">;
+def VST1q16 : VST1Q<{0,1,?,?}, "16">;
+def VST1q32 : VST1Q<{1,0,?,?}, "32">;
+def VST1q64 : VST1Q<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+ "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
+
+def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VST1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+
+def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+ "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+
+def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+
+// VST2 : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+ IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
+def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
+def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8 : VST2Q<{0,0,?,?}, "8">;
+def VST2q16 : VST2Q<{0,1,?,?}, "16">;
+def VST2q32 : VST2Q<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+ "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+
+// VST3 : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
+def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
+def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// VST4 : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
+def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
+def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+ "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+// VST1LN : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNPseudo<IIC_VST1ln> {
+ let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr)];
+}
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+ NEONvgetlaneu> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+ NEONvgetlaneu> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+
+def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ addrmode6:$Rn, am6offset:$Rm))]> {
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// VST2LN : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+ IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
+}
+
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
+}
+
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+// VST3LN : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST3LN";
+}
+
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST3lnu, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVST3LN";
+}
+
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+// VST4LN : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
+}
+
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST4lnu, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
+}
+
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+ (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+ OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+ InstrItinClass itin, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+ SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
+ SDNode MulOp, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+ (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+ (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic. The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+ Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm))))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (add DPR:$src1,
+ (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (add QPR:$src1,
+ (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ Intrinsic IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ Intrinsic IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+// Q = quarter int (8 bit) elements
+// H = half int (16 bit) elements
+// S = single int (32 bit) elements
+// D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
+
+// Neon 2-register vector intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+ def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+ def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+ def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
+ def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
+ def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, OpNode, Commutable>;
+ def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
+ def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
+ def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
+ def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+ v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+ v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0>
+ : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+ OpcodeStr, Dt, OpNode, Commutable> {
+ def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, OpNode, Commutable>;
+ def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp, Commutable>;
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp, Commutable>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp, Commutable>;
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+ def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+ def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp, Commutable>;
+ def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp, Commutable>;
+ def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, Commutable>;
+ def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+ IntOp, Commutable> {
+ def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+ def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+ def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+ def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+ def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+ def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+ def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
+ def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+ OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, Intrinsic IntOp>
+ : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+ def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+// element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+// Neon Shift Narrow operations,
+// element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, shr_imm8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, shr_imm16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, shr_imm32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+// VADD : Vector Add (integer and floating-point)
+defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+ add, 1>;
+def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+ v2f32, v2f32, fadd, 1>;
+def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+ v4f32, v4f32, fadd, 1>;
+// VADDL : Vector Add Long (Q = D + D)
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
+// VADDW : Vector Add Wide (Q = Q + D)
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+// VHADD : Vector Halving Add
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
+// VRHADD : Vector Rounding Halving Add
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+// VQADD : Vector Saturating Add
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
+// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+ int_arm_neon_vaddhn, 1>;
+// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+ int_arm_neon_vraddhn, 1>;
+
+// Vector Multiply Operations.
+
+// VMUL : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+ v2f32, v2f32, fmul, 1>;
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+ (v4f32 (VMULslfq (v4f32 QPR:$src1),
+ (v2f32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+ "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+// VMLA : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+ v4f32, v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+ (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlal", "s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+
+// VMLS : Vector Multiply Subtract (integer and floating-point)
+defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+ v4f32, v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Vector Subtract Operations.
+
+// VSUB : Vector Subtract (integer and floating-point)
+defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+ "vsub", "i", sub, 0>;
+def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+ v2f32, v2f32, fsub, 0>;
+def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+ v4f32, v4f32, fsub, 0>;
+// VSUBL : Vector Subtract Long (Q = D - D)
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
+// VSUBW : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+// VHSUB : Vector Halving Subtract
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "u", int_arm_neon_vhsubu, 0>;
+// VQSUB : Vector Saturing Subtract
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "u", int_arm_neon_vqsubu, 0>;
+// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+ int_arm_neon_vsubhn, 0>;
+// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+ int_arm_neon_vrsubhn, 0>;
+
+// Vector Comparisons.
+
+// VCEQ : Vector Compare Equal
+defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ NEONvceq, 1>;
+def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ NEONvceq, 1>;
+
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$Vd, $Vm, #0", NEONvceqz>;
+
+// VCGE : Vector Compare Greater Than or Equal
+defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
+def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ NEONvcge, 0>;
+
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$Vd, $Vm, #0", NEONvclez>;
+
+// VCGT : Vector Compare Greater Than
+defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ NEONvcgt, 0>;
+def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ NEONvcgt, 0>;
+
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$Vd, $Vm, #0", NEONvcltz>;
+
+// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+// VTST : Vector Test Bits
+defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+ IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+// VAND : Vector Bitwise AND
+def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+ v2i32, v2i32, and, 1>;
+def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+ v4i32, v4i32, and, 1>;
+
+// VEOR : Vector Bitwise Exclusive OR
+def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+ v2i32, v2i32, xor, 1>;
+def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+ v4i32, v4i32, xor, 1>;
+
+// VORR : Vector Bitwise OR
+def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+ v2i32, v2i32, or, 1>;
+def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+ v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+
+// VBIC : Vector Bitwise Bit Clear (AND NOT)
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+// VORN : Vector Bitwise OR NOT
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+// VMVN : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+}
+
+// VMVN : Vector Bitwise NOT
+def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+
+// VBIF : Vector Bitwise Insert if False
+// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+
+// VBIT : Vector Bitwise Insert if True
+// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+
+// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+// VABD : Vector Absolute Difference
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "u", int_arm_neon_vabdu, 1>;
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+
+// VABDL : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+// VABA : Vector Absolute Difference and Accumulate
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
+
+// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+// VMAX : Vector Maximum
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+// VMIN : Vector Minimum
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f32",
+ v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f32",
+ v4f32, v4f32, int_arm_neon_vmins, 1>;
+
+// Vector Pairwise Operations.
+
+// VPADD : Vector Pairwise Add
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i8",
+ v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i16",
+ v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i32",
+ v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f32",
+ v2f32, v2f32, int_arm_neon_vpadd, 0>;
+
+// VPADDL : Vector Pairwise Add Long
+defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+ int_arm_neon_vpaddls>;
+defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+ int_arm_neon_vpaddlu>;
+
+// VPADAL : Vector Pairwise Add and Accumulate Long
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+ int_arm_neon_vpadals>;
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+ int_arm_neon_vpadalu>;
+
+// VPMAX : Vector Pairwise Maximum
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+// VPMIN : Vector Pairwise Minimum
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+// VRECPE : Vector Reciprocal Estimate
+def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAD, "vrecpe", "u32",
+ v2i32, v2i32, int_arm_neon_vrecpe>;
+def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAQ, "vrecpe", "u32",
+ v4i32, v4i32, int_arm_neon_vrecpe>;
+def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f32",
+ v2f32, v2f32, int_arm_neon_vrecpe>;
+def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f32",
+ v4f32, v4f32, int_arm_neon_vrecpe>;
+
+// VRECPS : Vector Reciprocal Step
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f32",
+ v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f32",
+ v4f32, v4f32, int_arm_neon_vrecps, 1>;
+
+// VRSQRTE : Vector Reciprocal Square Root Estimate
+def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAD, "vrsqrte", "u32",
+ v2i32, v2i32, int_arm_neon_vrsqrte>;
+def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAQ, "vrsqrte", "u32",
+ v4i32, v4i32, int_arm_neon_vrsqrte>;
+def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrte>;
+def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrte>;
+
+// VRSQRTS : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+
+// Vector Shifts.
+
+// VSHL : Vector Shift
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu>;
+
+// VSHL : Vector Shift Left (Immediate)
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
+// VSHR : Vector Shift Right (Immediate)
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
+
+// VSHLL : Vector Shift Left Long
+defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
+
+// VSHLL : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+ bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+ ValueType OpTy, SDNode OpNode>
+ : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+ ResTy, OpTy, OpNode> {
+ let Inst{21-16} = op21_16;
+ let DecoderMethod = "DecodeVSHLMaxInstruction";
+}
+def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+ v8i16, v8i8, NEONvshlli>;
+def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+ v4i32, v4i16, NEONvshlli>;
+def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+ v2i64, v2i32, NEONvshlli>;
+
+// VSHRN : Vector Shift Right and Narrow
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ NEONvshrn>;
+
+// VRSHL : Vector Rounding Shift
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu>;
+// VRSHR : Vector Rounding Shift Right
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
+
+// VRSHRN : Vector Rounding Shift Right and Narrow
+defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+ NEONvrshrn>;
+
+// VQSHL : Vector Saturating Shift
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu>;
+// VQSHL : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
+// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+
+// VQSHRN : Vector Saturating Shift Right and Narrow
+defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+ NEONvqshrns>;
+defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+ NEONvqshrnu>;
+
+// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+ NEONvqshrnsu>;
+
+// VQRSHL : Vector Saturating Rounding Shift
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+ NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+ NEONvqrshrnu>;
+
+// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+ NEONvqrshrnsu>;
+
+// VSRA : Vector Shift Right and Accumulate
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+// VRSRA : Vector Rounding Shift Right and Accumulate
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+// VSLI : Vector Shift Left and Insert
+defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
+// VSRI : Vector Shift Right and Insert
+defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
+
+// Vector Absolute and Saturating Absolute.
+
+// VABS : Vector Absolute Value
+defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+ int_arm_neon_vabs>;
+def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ IIC_VUNAD, "vabs", "f32",
+ v2f32, v2f32, int_arm_neon_vabs>;
+def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ IIC_VUNAQ, "vabs", "f32",
+ v4f32, v4f32, int_arm_neon_vabs>;
+
+// VQABS : Vector Saturating Absolute Value
+defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+ int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd : PatFrag<(ops node:$in),
+ (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq : PatFrag<(ops node:$in),
+ (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+ IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+// VNEG : Vector Negate (integer)
+def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
+def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+// VNEG : Vector Negate (floating-point)
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+
+def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+// VQNEG : Vector Saturating Negate
+defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+ int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+// VCLS : Vector Count Leading Sign Bits
+defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+ int_arm_neon_vcls>;
+// VCLZ : Vector Count Leading Zeros
+defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+ int_arm_neon_vclz>;
+// VCNT : Vector Count One Bits
+def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiD, "vcnt", "8",
+ v8i8, v8i8, int_arm_neon_vcnt>;
+def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiQ, "vcnt", "8",
+ v16i8, v16i8, int_arm_neon_vcnt>;
+
+// Vector Swap -- for disassembly only.
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
+
+// Vector Move Operations.
+
+// VMOV : Vector Move (Register)
+def : InstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : InstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VMOV : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+// VMOV : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+ [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+ (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+ (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+ (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane))>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+// VMOV : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+ [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+ (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+
+// VDUP : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+
+// VDUP : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty, Operand IdxTy>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
+ [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand IdxTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
+ [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+ VectorIndex32:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
+ bits<3> lane;
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
+ bits<2> lane;
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
+ bits<1> lane;
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
+ bits<3> lane;
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
+ bits<2> lane;
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
+ bits<1> lane;
+ let Inst{19} = lane{0};
+}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32q DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+ (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+ (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+ (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+ (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+// VMOVN : Vector Narrowing Move
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+ "vmovn", "i", trunc>;
+// VQMOVN : Vector Saturating Narrowing Move
+defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+ "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+ "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+ "vqmovun", "s", int_arm_neon_vqmovnsu>;
+// VMOVL : Vector Lengthening Move
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+// Vector Conversions.
+
+// VCVT : Vector Convert Between Floating-Point and Integers
+def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+
+def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v4i32, v4f32, fp_to_sint>;
+def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v4i32, v4f32, fp_to_uint>;
+def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v4f32, v4i32, sint_to_fp>;
+def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v4f32, v4i32, uint_to_fp>;
+
+// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+
+// VCVT : Vector Convert Between Half-Precision and Single-Precision.
+def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+ IIC_VUNAQ, "vcvt", "f16.f32",
+ v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+ Requires<[HasNEON, HasFP16]>;
+def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+ IIC_VUNAQ, "vcvt", "f32.f16",
+ v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+ Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+// VREV64 : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+
+def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+
+// VREV32 : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+// VREV16 : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+// Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+ : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+// VEXT : Vector Extract
+
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+ (Ty DPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+ (Ty QPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+ (v2f32 DPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+ (v4f32 QPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
+// VTRN : Vector Transpose
+
+def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+// VUZP : Vector Unzip (Deinterleave)
+
+def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+
+def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+// VZIP : Vector Zip (Interleave)
+
+def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+
+def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+// VTBL : Vector Table Lookup
+let DecoderMethod = "DecodeTBLInstruction" in {
+def VTBL1
+ : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBL2
+ : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+def VTBL3
+ : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+def VTBL4
+ : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ NVTBLFrm, IIC_VTB4,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBL2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+// VTBX : Vector Table Extension
+def VTBX1
+ : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+ DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBX2
+ : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+def VTBX3
+ : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ NVTBLFrm, IIC_VTBX3,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "$orig = $Vd", []>;
+def VTBX4
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+ DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBX2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+ IIC_VTBX2, "$orig = $dst", []>;
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+} // DecoderMethod = "DecodeTBLInstruction"
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$acc, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000..cedb547
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1436 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def imm_sr_XFORM: SDNodeXForm<imm, [{
+ unsigned Imm = N->getZExtValue();
+ return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32);
+}]>;
+def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+}], imm_sr_XFORM> {
+ let PrintMethod = "printThumbSRImm";
+ let ParserMatchClass = ThumbSRImmAsmOperand;
+}
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
+// Scaled 4 immediate.
+def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
+def t_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_1020s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; }
+def t_imm0_508s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_508s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Define Thumb specific addressing modes.
+
+let OperandType = "OPERAND_PCREL" in {
+def t_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getThumbBRTargetOpValue";
+ let DecoderMethod = "DecodeThumbBROperand";
+}
+
+def t_bcctarget : Operand<i32> {
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+ let DecoderMethod = "DecodeThumbBCCTargetOperand";
+}
+
+def t_cbtarget : Operand<i32> {
+ let EncoderMethod = "getThumbCBTargetOpValue";
+ let DecoderMethod = "DecodeThumbCmpBROperand";
+}
+
+def t_bltarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLTargetOperand";
+}
+
+def t_blxtarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLXTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLXOffset";
+}
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+// We use separate scaled versions because the Select* functions need
+// to explicitly check for a matching constant and return false here so that
+// the reg+imm forms will match instead. This is a horrible way to do that,
+// as it forces tight coupling between the methods, but it's how selectiondag
+// currently works.
+def t_addrmode_rrs1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+def t_addrmode_rrs2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+def t_addrmode_rrs4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
+def t_addrmode_is4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let ParserMatchClass = t_addrmode_is4_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
+def t_addrmode_is2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let ParserMatchClass = t_addrmode_is2_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
+def t_addrmode_is1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let ParserMatchClass = t_addrmode_is1_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+// FIXME: This really shouldn't have an explicit SP operand at all. It should
+// be implicit, just like in the instruction encoding itself.
+def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeSP";
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let ParserMatchClass = t_addrmode_sp_asm_operand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let DecoderMethod = "DecodeThumbAddrModePC";
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+ [(ARMcallseq_start imm:$amt)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+class T1SystemEncoding<bits<8> opc>
+ : T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = opc;
+}
+
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
+ T1SystemEncoding<0x00>, // A8.6.110
+ Requires<[IsThumb2]>;
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
+ T1SystemEncoding<0x10>; // A8.6.410
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
+ T1SystemEncoding<0x20>; // A8.6.408
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
+ T1SystemEncoding<0x30>; // A8.6.409
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
+ T1SystemEncoding<0x40>; // A8.6.157
+
+// The imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
+ []>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b10;
+ // A8.6.22
+ bits<8> val;
+ let Inst{7-0} = val;
+}
+
+def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
+ []>, T1Encoding<0b101101> {
+ bits<1> end;
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = end;
+ let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+ NoItinerary, "cps$imod $iflags", []>,
+ T1Misc<0b0110011> {
+ // A8.6.38 & B6.1.1
+ bit imod;
+ bits<3> iflags;
+
+ let Inst{4} = imod;
+ let Inst{3} = 0;
+ let Inst{2-0} = iflags;
+ let DecoderMethod = "DecodeThumbCPS";
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6
+ bits<3> dst;
+ let Inst{6-3} = 0b1111; // Rm = pc
+ let Inst{2-0} = dst;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// FIXME: This should not be marked as having side effects, and it should be
+// rematerializable. Clearing the side effect bit causes miscompilations,
+// probably because the instruction can be moved around.
+def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
+ IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
+ T1Encoding<{1,0,1,0,1,?}> {
+ // A6.2 & A8.6.8
+ bits<3> dst;
+ bits<8> imm;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "add", "\t$Rdn, $imm", []>,
+ T1Misc<{0,0,0,0,0,?,?}> {
+ // A6.2.5 & A8.6.8
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
+ T1Misc<{0,0,0,0,1,?,?}> {
+ // A6.2.5 & A8.6.214
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
+}
+
+// Can optionally specify SP as a three operand instruction.
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
+def : tInstAlias<"sub${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
+
+// ADD <Rm>, sp
+def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T1
+ bits<4> Rdn;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = 0b1101;
+ let Inst{2-0} = Rdn{2-0};
+ let DecoderMethod = "DecodeThumbAddSPReg";
+}
+
+// ADD sp, <Rm>
+def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T2
+ bits<4> Rm;
+ let Inst{7} = 1;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b101;
+ let DecoderMethod = "DecodeThumbAddSPReg";
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ }
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+ [(ARMretflag)], (tBX LR, pred:$p)>;
+
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+ 2, IIC_Br, [],
+ (tBX GPR:$Rm, pred:$p)>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
+ Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [SP] in {
+ // Also used for Thumb2
+ def tBL : TIx2<0b11110, 0b11, 1,
+ (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
+ "bl${p}\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, IsNotDarwin]> {
+ bits<22> func;
+ let Inst{26} = func{21};
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi : TIx2<0b11110, 0b11, 0,
+ (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+ T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
+
+ // ARMv4T
+ def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
+}
+
+let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
+ Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ // Also used for Thumb2
+ def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+ 4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
+ (tBL pred:$p, t_bltarget:$func)>,
+ Requires<[IsThumb, IsDarwin]>;
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+ 4, IIC_Br, [(ARMcall tglobaladdr:$func)],
+ (tBLXi pred:$p, t_blxtarget:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+ // Also used for Thumb2
+ def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops),
+ 2, IIC_Br, [(ARMtcall GPR:$func)],
+ (tBLXr pred:$p, GPR:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+ // ARMv4T
+ def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b", "\t$target", [(br bb:$target)]>,
+ T1Encoding<{1,1,1,0,0,?}> {
+ bits<11> target;
+ let Inst{10-0} = target;
+ }
+
+ // Far jump
+ // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+ // the clobber of LR.
+ let Defs = [LR] in
+ def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>;
+
+ def tBR_JTr : tPseudoInst<(outs),
+ (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ "b${p}\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+ T1BranchCond<{1,1,0,1}> {
+ bits<4> p;
+ bits<8> target;
+ let Inst{11-8} = p;
+ let Inst{7-0} = target;
+}
+
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
+ // on Darwin), so it's in ARMInstrThumb2.td.
+ def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (tBX GPR:$dst, (ops 14, zero_reg))>,
+ Requires<[IsThumb, IsDarwin]>;
+ }
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ def tTAILJMPdND : tPseudoExpand<(outs),
+ (ins t_brtarget:$dst, pred:$p, variable_ops),
+ 4, IIC_Br, [],
+ (tB t_brtarget:$dst, pred:$p)>,
+ Requires<[IsThumb, IsNotDarwin]>;
+ def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (tBX GPR:$dst, (ops 14, zero_reg))>,
+ Requires<[IsThumb, IsNotDarwin]>;
+ }
+}
+
+
+// A8.6.218 Supervisor Call (Software Interrupt)
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
+ "svc", "\t$imm", []>, Encoding16 {
+ bits<8> imm;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ "trap", [(trap)]>, Encoding16 {
+ let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+ (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+ (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iLoad_r, IIC_iLoad_i, "ldr",
+ UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB : // A8.6.80
+ T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ AddrModeT1_1, IIC_iLoad_bh_r,
+ "ldrsb", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH : // A8.6.84
+ T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ AddrModeT1_2, IIC_iLoad_bh_r,
+ "ldrsh", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// Load tconstpool
+// FIXME: Use ldr.n to work around a Darwin assembler bug.
+let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", ".n\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// FIXME: Remove this entry when the above ldr.n workaround is fixed.
+// For disassembly use only.
+def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [/* disassembly only */]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iStore_r, IIC_iStore_i, "str",
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+// Writeback version is just a pseudo, as there's no encoding difference.
+// Writeback happens iff the base register is not in the destination register
+// list.
+def tLDMIA_UPD :
+ InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "$Rn = $wb", IIC_iLoad_mu>,
+ PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
+ let Size = 2;
+ let OutOperandList = (outs GPR:$wb);
+ let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+ let Pattern = [];
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// There is no non-writeback version of STM for Thumb.
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+ (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ AddrModeNone, 2, IIC_iStore_mu,
+ "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
+ T1Encoding<{1,1,0,0,0,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+} // neverHasSideEffects
+
+def : InstAlias<"ldm${p} $Rn!, $regs",
+ (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{15};
+ let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iStore_m,
+ "push${p}\t$regs", []>,
+ T1Misc<{0,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{14};
+ let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1Misc<opA> {
+ bits<3> Rm;
+ bits<3> Rd;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ bits<3> Rd;
+ let Inst{8-6} = Rm;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rdn;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rdn;
+ bits<8> imm8;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+
+// Add immediate
+def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+
+let neverHasSideEffects = 1 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND : // A8.6.12
+ T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "and", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+
+// ASR immediate
+def tASRri : // A8.6.14
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
+ IIC_iMOVsi,
+ "asr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr : // A8.6.15
+ T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "asr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+
+// BIC register
+def tBIC : // A8.6.20
+ T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "bic", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//def tCMN : // A8.6.33
+// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+// IIC_iCMPr,
+// "cmn", "\t$lhs, $rhs",
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz : // A8.6.33
+ T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
+ "cmp", "\t$Rn, $imm8",
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+ T1General<{1,0,1,?,?}> {
+ // A8.6.35
+ bits<3> Rn;
+ bits<8> imm8;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = imm8;
+}
+
+// CMP register
+def tCMPr : // A8.6.36 T1
+ T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm",
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm", []>,
+ T1Special<{0,1,?,?}> {
+ // A8.6.36 T2
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{7} = Rn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR : // A8.6.45
+ T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "eor", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSL immediate
+def tLSLri : // A8.6.88
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
+ IIC_iMOVsi,
+ "lsl", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr : // A8.6.89
+ T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsl", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSR immediate
+def tLSRri : // A8.6.90
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
+ IIC_iMOVsi,
+ "lsr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr : // A8.6.91
+ T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
+ "mov", "\t$Rd, $imm8",
+ [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ T1General<{1,0,0,?,?}> {
+ // A8.6.96
+ bits<3> Rd;
+ bits<8> imm8;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = imm8;
+}
+// Because we have an explicit tMOVSr below, we need an alias to handle
+// the immediate "movs" form here. Blech.
+def : tInstAlias <"movs $Rdn, $imm",
+ (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>;
+
+// A7-73: MOV(2) - mov setting flag.
+
+let neverHasSideEffects = 1 in {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+ 2, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", "", []>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{7} = Rd{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "movs\t$Rd, $Rm", []>, Encoding16 {
+ // A8.6.97
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{15-6} = 0b0000000000;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+} // neverHasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL : // A8.6.105 T1
+ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
+ [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
+ T1DataProcessing<0b1101> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+ let AsmMatchConverter = "cvtThumbMultiply";
+}
+
+def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
+ pred:$p)>;
+
+// Move inverse register
+def tMVN : // A8.6.107
+ T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+ "mvn", "\t$Rd, $Rn",
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR : // A8.6.114
+ T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "orr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV : // A8.6.134
+ T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 : // A8.6.135
+ T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev16", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH : // A8.6.136
+ T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "revsh", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR : // A8.6.139
+ T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "ror", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB : // A8.6.141
+ T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+ IIC_iALUi,
+ "rsb", "\t$Rd, $Rn, #0",
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC : // A8.6.151
+ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sbc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+
+// Subtract immediate
+def tSUBi3 : // A8.6.210 T1
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "sub", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tSUBi8 : // A8.6.210 T2
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "sub", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr : // A8.6.212
+ T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sub", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+
+// Sign-extend byte
+def tSXTB : // A8.6.222
+ T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH : // A8.6.224
+ T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : // A8.6.230
+ T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+ "tst", "\t$Rn, $Rm",
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB : // A8.6.262
+ T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH : // A8.6.264
+ T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in // Expanded after instruction selection.
+ def tMOVCCr_pseudo :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+ NoItinerary,
+ [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ bits<3> Rd;
+ bits<8> addr;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = addr;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 2, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 2, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everything out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "","",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, 0, IndexModeNone,
+ Pseudo, NoItinerary, "", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+ (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+ (tCMPr tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
+ (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
+ (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
+ (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
+ (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
+ (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
+ (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// ConstantPool, GlobalAddress
+def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+ Requires<[IsThumb, IsNotDarwin]>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
+ Requires<[IsThumb, IsDarwin]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+ (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+ (tLDRBi t_addrmode_is1:$addr)>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+
+def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
+ (tLDRBi t_addrmode_is1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
+ (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
+ (tLDRHi t_addrmode_is2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
+ (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
+ (tLDRi t_addrmode_is4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
+ (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
+ (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
+ (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
+ (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary,
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ 2, IIC_iPop_Br, [],
+ (tPOP pred:$p, reglist:$regs)>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+ 2, IIC_Br, [(brind GPR:$Rm)],
+ (tMOVr PC, GPR:$Rm, pred:$p)>;
+}
+
+
+// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00
+// encoding is available on ARMv6K, but we don't differentiate that finely.
+def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
+
+
+// For round-trip assembly/disassembly, we have to handle a CPS instruction
+// without any iflags. That's not, strictly speaking, valid syntax, but it's
+// a useful extention and assembles to defined behaviour (the insn does
+// nothing).
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 0000000..471ec29
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,4022 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred_asmoperand : AsmOperandClass {
+ let Name = "ITCondCode";
+ let ParserMethod = "parseITCondCode";
+}
+def it_pred : Operand<i32> {
+ let PrintMethod = "printMandatoryPredicateOperand";
+ let ParserMatchClass = it_pred_asmoperand;
+}
+
+// IT block condition mask
+def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
+def it_mask : Operand<i32> {
+ let PrintMethod = "printThumbITMask";
+ let ParserMatchClass = it_mask_asmoperand;
+}
+
+// t2_shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 not allowed
+def t2_shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+ let DecoderMethod = "DecodeT2ShifterImmOperand";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getT2SORegOpValue";
+ let PrintMethod = "printT2SOOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }]> {
+ let ParserMatchClass = t2_so_imm_asmoperand;
+ let EncoderMethod = "getT2SOImmOpValue";
+ let DecoderMethod = "DecodeT2SOImm";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+def t2_so_imm_not : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM>;
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_neg_XFORM>;
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095 : Operand<i32>,
+ ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 4096;
+}]>;
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
+
+def imm0_255_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 255;
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+ return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+def lo5AllOne : PatLeaf<(i32 imm), [{
+ // Returns true if all low 5-bits are 1.
+ return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
+def t2addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+ let PrintMethod = "printAddrModeImm12Operand";
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm12";
+ let ParserMatchClass = t2addrmode_imm12_asmoperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2ldrlabel := imm12
+def t2ldrlabel : Operand<i32> {
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printT2LdrLabelOperand";
+}
+
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+ let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
+// t2addrmode_posimm8 := reg + imm8
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def t2addrmode_posimm8 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemPosImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_negimm8 := reg - imm8
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def t2addrmode_negimm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemNegImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm8 := reg +/- imm8
+def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
+def t2addrmode_imm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2am_imm8_offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+ let DecoderMethod = "DecodeT2Imm8";
+}
+
+// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
+def t2addrmode_imm8s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4Operand";
+ let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8s4";
+ let ParserMatchClass = MemImm8s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
+def t2am_imm8s4_offset : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+ let EncoderMethod = "getT2Imm8s4OpValue";
+ let DecoderMethod = "DecodeT2Imm8S4";
+}
+
+// t2addrmode_imm0_1020s4 := reg + (imm8 << 2)
+def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
+ let Name = "MemImm0_1020s4Offset";
+}
+def t2addrmode_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
+ let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
+ let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
+def t2addrmode_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+ let PrintMethod = "printT2AddrModeSoRegOperand";
+ let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let DecoderMethod = "DecodeT2AddrModeSOReg";
+ let ParserMatchClass = t2addrmode_so_reg_asmoperand;
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+}
+
+// Addresses for the TBB/TBH instructions.
+def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
+def addrmode_tbb : Operand<i32> {
+ let PrintMethod = "printAddrModeTBB";
+ let ParserMatchClass = addrmode_tbb_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
+def addrmode_tbh : Operand<i32> {
+ let PrintMethod = "printAddrModeTBH";
+ let ParserMatchClass = addrmode_tbh_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0,
+ string wide = ""> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+// the ".w" suffix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>;
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>;
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sTwoRegImm<
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{15} = 0;
+ }
+ }
+ // 12-bit imm
+ def ri12 : T2I<
+ (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
+ let Inst{23-21} = op23_21;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
+ }
+ // register
+ def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
+ IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Defs = [CPSR], Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
+ Requires<[IsThumb2]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>;
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>;
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+// rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
+ string baseOpc> {
+ // 5-bit imm
+ def ri : T2sTwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
+ opc, ".w\t$Rd, $Rm, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-21} = 0b010010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-21} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0000;
+ }
+
+ // Optional destination register
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc> {
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def ri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rn, $imm",
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def rr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+ opc, ".w\t$Rn, $Rm",
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rn, $ShiftedRm",
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+
+ // Assembler aliases w/o the ".w" suffix.
+ // No alias here for 'rr' version as not all instantiations of this
+ // multiclass want one (CMP in particular, does not).
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
+ t2_so_imm:$imm, pred:$p)>;
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
+ t2_so_reg:$shift,
+ pred:$p)>;
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = signed;
+ let Inst{23} = 1;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{9} = addr{8}; // U
+ let Inst{8} = 0; // The W bit.
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadShift";
+ }
+
+ // FIXME: Is the pci variant actually needed?
+ def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ let isReMaterializable = 1;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = ?; // add = (U == '1')
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = 0b1111; // Rn
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-0} = addr{11-0};
+ }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0001;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsThumb2, HasT2ExtractPack]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+class T2I_exta_rrot_np<bits<3> opcod, string opc>
+ : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<12> label;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = label{11};
+ let Inst{14-12} = label{10-8};
+ let Inst{7-0} = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+ (ins t2adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-24} = 0b10;
+ // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+ let Inst{22} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<13> addr;
+ let Inst{11-8} = Rd;
+ let Inst{23} = addr{12};
+ let Inst{21} = addr{12};
+ let Inst{26} = addr{11};
+ let Inst{14-12} = addr{10-8};
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2Adr";
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 4, IIC_iALUi,
+ []>;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
+ UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(sextloadi8 node:$Src)>>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins t2addrmode_imm8s4:$addr),
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+ (t2LDRHpci tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+
+def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt;
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW.
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+ (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
+
+// Indexed stores
+def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "str", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+
+def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+
+def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strh", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+}
+
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = 0; // not signed
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 0; // store
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2LdrdPre";
+ let DecoderMethod = "DecodeT2LDRDPreInstruction";
+}
+
+def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
+ IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
+
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2StrdPre";
+ let DecoderMethod = "DecodeT2STRDPreInstruction";
+}
+
+def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
+ t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access. These are for disassembly only.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // U = 0
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1100;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{7-0} = addr{7-0}; // imm8
+ }
+
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0000000;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm2
+
+ let DecoderMethod = "DecodeT2LoadShift";
+ }
+}
+
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+multiclass thumb2_st_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+}
+
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov", ".w\t$Rd, $Rm", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0000;
+}
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+ "mov", ".w\t$Rd, $imm",
+ [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'.
+// Use aliases to get that to play nice here.
+def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+
+def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set rGPR:$Rd, imm0_65535:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
+}
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+ (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set rGPR:$Rd,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0110;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+ UnOpFrag<(sext_inreg node:$Src, i8)>>;
+def t2SXTH : T2I_ext_rrot<0b000, "sxth",
+ UnOpFrag<(sext_inreg node:$Src, i16)>>;
+def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+ UnOpFrag<(and node:$Src, 0x000000FF)>>;
+def t2UXTH : T2I_ext_rrot<0b001, "uxth",
+ UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+ UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16 rGPR:$Src, 3)>,
+// Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16 rGPR:$Src, 1)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm t2ADD : T2I_bin_ii12rs<0b000, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+let hasPostISelHook = 1 in {
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+}
+
+// RSB
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
+ (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
+ (t2SBCri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
+ (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate].
+def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Signed/Unsigned saturate.
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> sat_imm;
+ bits<7> sh;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{4-0} = sat_imm;
+ let Inst{21} = sh{5};
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+ (outs rGPR:$Rd),
+ (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{5} = 0;
+}
+
+def t2SSAT16: T2SatI<
+ (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
+}
+
+def t2USAT: T2SatI<
+ (outs rGPR:$Rd),
+ (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
+ NoItinerary,
+ "usat16", "\t$Rd, $sat_imm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-22} = 0b1111001110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate Instructions.
+//
+
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
+ BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
+ BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
+ BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
+
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b01; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b10; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>;
+defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+
+defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>,
+ "t2BIC">;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<5> msb;
+ bits<5> lsb;
+
+ let Inst{11-8} = Rd;
+ let Inst{4-0} = msb{4-0};
+ let Inst{14-12} = lsb{4-2};
+ let Inst{7-6} = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+
+ let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+ IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10100;
+ let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b11100;
+ let Inst{15} = 0;
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+ def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+ bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+ }
+}
+
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>,
+ "t2ORN", 0, "">;
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1>;
+
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+ Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+ (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "mul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+} // neverHasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+ def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+ def BB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y>
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+def t2SMUAD: T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+ 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+ 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2FourReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLADX : T2FourReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+//===----------------------------------------------------------------------===//
+// Division Instructions.
+// Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-22} = 0b01010;
+ let Inst{21-20} = op1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = op2;
+ let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rbit", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev16", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "revsh", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
+
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+ (t2REVSH rGPR:$Rm)>;
+
+def t2PKHBT : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh),
+ IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+ (and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 0; // BT form
+ let Inst{4} = 0;
+
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+ (and (sra rGPR:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 1; // TB form
+ let Inst{4} = 0;
+
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
+ (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
+
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
+// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>,
+ "t2CMNz">;
+
+//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST : T2I_cmp_irs<0b0000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
+ "t2TST">;
+defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
+ "t2TEQ">;
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, pred:$p),
+ 4, IIC_iCMOVr,
+ [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+// FIXME: Pseudo-ize these. For now, just mark codegen only.
+let isCodeGenOnly = 1 in {
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
+ IIC_iCMOVi,
+ "movw", "\t$Rd, $imm", []>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0011;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod; // Shift type.
+}
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f5;
+ let Inst{3-0} = opt;
+}
+}
+
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dsb", "\t$opt", []>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f4;
+ let Inst{3-0} = opt;
+}
+
+def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "isb", "\t$opt",
+ []>, Requires<[IsThumb2, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f6;
+ let Inst{3-0} = opt;
+}
+
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+ let Inst{3-0} = 0b1111;
+
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+
+ bits<4> Rd;
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{3-0} = Rd;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexb", "\t$Rt, $addr", "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexh", "\t$Rt, $addr", "", []>;
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrex", "\t$Rt, $addr", "", []> {
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000101;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = addr{7-0};
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ t2addrmode_imm0_1020s4:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strex", "\t$Rd, $Rt, $addr", "",
+ []> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = addr{7-0};
+}
+}
+
+let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-16} = 0xf3bf;
+ let Inst{15-14} = 0b10;
+ let Inst{13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [(br bb:$target)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 1;
+
+ bits<20> target;
+ let Inst{26} = target{19};
+ let Inst{11} = target{18};
+ let Inst{13} = target{17};
+ let Inst{21-16} = target{16-11};
+ let Inst{10-0} = target{10-0};
+}
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+ (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
+ "tbb", "\t$addr", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 0; // B form
+ let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
+}
+
+def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
+ "tbh", "\t$addr", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 1; // H form
+ let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects ", "two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> p;
+ let Inst{25-22} = p;
+
+ bits<21> target;
+ let Inst{26} = target{20};
+ let Inst{11} = target{19};
+ let Inst{13} = target{18};
+ let Inst{21-16} = target{17-12};
+ let Inst{10-0} = target{11-1};
+
+ let DecoderMethod = "DecodeThumb2BCCInstruction";
+}
+
+// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin version.
+ let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in
+ def tTAILJMPd: tPseudoExpand<(outs),
+ (ins uncondbrtarget:$dst, pred:$p, variable_ops),
+ 4, IIC_Br, [],
+ (t2B uncondbrtarget:$dst, pred:$p)>,
+ Requires<[IsThumb2, IsDarwin]>;
+}
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+ AddrModeNone, 2, IIC_iALUx,
+ "it$mask\t$cc", "", []> {
+ // 16-bit instruction.
+ let Inst{31-16} = 0x0000;
+ let Inst{15-8} = 0b10111111;
+
+ bits<4> cc;
+ bits<4> mask;
+ let Inst{7-4} = cc;
+ let Inst{3-0} = mask;
+
+ let DecoderMethod = "DecodeIT";
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> {
+ bits<4> func;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{19-16} = func;
+ let Inst{15-0} = 0b1000111100000000;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$Rn, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+}
+
+
+// Change Processor State is a system instruction.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+ !strconcat("cps", asm_op), []> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-9} = imod;
+ let Inst{8} = M;
+ let Inst{7-5} = iflags;
+ let Inst{4-0} = mode;
+ let DecoderMethod = "DecodeT2CPSInstruction";
+}
+
+let M = 1 in
+ def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+ "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+ def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm, []> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-0} = op7_0;
+}
+
+def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
+def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
+def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+ bits<4> opt;
+ let Inst{31-20} = 0b111100111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-8} = 0b10000000;
+ let Inst{7-4} = 0b1111;
+ let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction.
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+
+ bits<4> opt;
+ let Inst{19-16} = opt;
+}
+
+class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<5> mode;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24-23} = Op;
+ let Inst{22} = 0;
+ let Inst{21} = W;
+ let Inst{20-16} = 0b01101;
+ let Inst{15-5} = 0b11000000000;
+ let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction.
+def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb", "\tsp!, $mode", []>;
+def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb","\tsp, $mode", []>;
+def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp!, $mode", []>;
+def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp, $mode", []>;
+
+// Return From Exception is a system instruction.
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xc000;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB : T2RFE<0b111010000001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA : T2RFE<0b111010011001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
+
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
+ NoItinerary, []>, Requires<[IsThumb2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor load/store -- for disassembly only
+//
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm>
+ : T2I<oops, iops, NoItinerary, opc, asm, []> {
+ let Inst{31-28} = op31_28;
+ let Inst{27-25} = 0b110;
+}
+
+multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
+ def _OFFSET : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset"> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : T2CI<op31_28, (outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+// Move to ARM core register from Special Register
+
+// A/R class MRS.
+//
+// A/R class can only move from CPSR or SPSR.
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>,
+ Requires<[IsThumb2,IsARClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
+}
+
+def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
+
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>,
+ Requires<[IsThumb2,IsARClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111111111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
+}
+
+// M class MRS.
+//
+// This MRS has a mask field in bits 7-0 and can take more values than
+// the A/R class (a full msr_mask).
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
+ "mrs", "\t$Rd, $mask", []>,
+ Requires<[IsThumb2,IsMClass]> {
+ bits<4> Rd;
+ bits<8> mask;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = 0b1111;
+ let Inst{7-0} = mask;
+}
+
+
+// Move from ARM core register to Special Register
+//
+// A/R class MSR.
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn", []>,
+ Requires<[IsThumb2,IsARClass]> {
+ bits<5> mask;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = mask{4}; // R Bit
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = mask{3-0};
+ let Inst{7-0} = 0;
+}
+
+// M class MSR.
+//
+// Move from ARM core register to Special Register
+def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
+ NoItinerary, "msr", "\t$SYSm, $Rn", []>,
+ Requires<[IsThumb2,IsMClass]> {
+ bits<8> SYSm;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{7-0} = SYSm;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register
+//
+
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : T2Cop<Op, oops, iops,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ pattern> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+ list<dag> pattern = []>
+ : T2Cop<Op, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+ (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
+
+def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+ GPR:$Rt2, imm:$CRm)]>;
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.
+//
+
+def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+}
+
+def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Atomic load/store patterns
+def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr),
+ (t2LDRi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr),
+ (t2LDRi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr),
+ (t2LDRs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Aliases for ADC without the ".w" optional width specifier.
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm",
+ (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SBC without the ".w" optional width specifier.
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm",
+ (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for ADD without the ".w" optional width specifier.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
+ (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SUB without the ".w" optional width specifier.
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
+ (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Alias for compares without the ".w" optional width specifier.
+def : t2InstAlias<"cmn${p} $Rn, $Rm",
+ (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"teq${p} $Rn, $Rm",
+ (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"tst${p} $Rn, $Rm",
+ (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+
+// Memory barriers
+def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>;
+
+// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Alias for MVN without the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
+ (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
+ (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// PUSH/POP aliases for STM/LDM
+def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
+def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+
+
+// Alias for RSB without the ".w" optional width specifier, and with optional
+// implied destination register.
+def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $imm",
+ (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm",
+ (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm",
+ (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
+
+// SSAT/USAT optional shift operand.
+def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+
+// STM w/o the .w suffix.
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for STR, STRB, and STRH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Extend instruction optional rotate operand.
+def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb${p} $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+// Extend instruction w/o the ".w" optional width specifier.
+def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000..e746cf2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1165 @@
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "parseFPImm";
+}
+
+def vfp_f32imm : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
+def vfp_f64imm : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+ [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+ IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+ [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+ IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+ [(store SPR:$Sd, addrmode5:$addr)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // Double Precision
+ def DIA :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DIA_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+
+ // Single Precision
+ def SIA :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SIA_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+} // neverHasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def VADDD : ADbI<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS : ASbIn<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSUBD : ADbI<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VDIVD : ADbI<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS : ASbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD : ADbI<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS : ASbIn<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+ (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let Defs = [FPSCR] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-6} = 0b11;
+ let Inst{4} = 0;
+}
+
+// Between half-precision and single-precision. For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let neverHasSideEffects = 1 in {
+def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in {
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+ [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VMOVRRS : AVConv3I<0b11000101, 0b1010,
+ (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> src1;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = src1{3-0};
+ let Inst{5} = src1{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+ let DecoderMethod = "DecodeVMOVRRS";
+}
+} // neverHasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+ (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+ IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ // Instruction operands.
+ bits<5> dst1;
+ bits<4> src1;
+ bits<4> src2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = dst1{3-0};
+ let Inst{5} = dst1{4};
+ let Inst{15-12} = src1;
+ let Inst{19-16} = src2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+
+ let DecoderMethod = "DecodeVMOVSRR";
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX: SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR: GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ let Inst{7} = 1; // s32
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ let Inst{7} = 1; // s32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ let Inst{7} = 0; // u32
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ let Inst{7} = 0; // u32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+// S16 (U=0, sx=0) -> SH
+// U16 (U=1, sx=0) -> UH
+// S32 (U=0, sx=1) -> SL
+// U32 (U=1, sx=1) -> UL
+
+// FIXME: Marking these as codegen only seems wrong. They are real
+// instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+
+//===----------------------------------------------------------------------===//
+// FP Multiply-Accumulate Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let neverHasSideEffects = 1 in {
+def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
+ 4, IIC_fpUNA64,
+ [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
+ 4, IIC_fpUNA32,
+ [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> Rt;
+
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
+ let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+ def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpexc", []>;
+ def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpsid", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
+ let Inst{27-20} = 0b11101110;
+ let Inst{19-16} = opc19_16;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
+
+let Defs = [FPSCR] in {
+ // Application level GPR -> FPSCR
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+ // System level GPR -> FPEXC
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpexc, $src", []>;
+ // System level GPR -> FPSID
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+ VFPMiscFrm, IIC_fpUNA64,
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ bits<5> Dd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Dd{4};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision.
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ bits<5> Sd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Sd{0};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision.
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases.
+//
+
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 0000000..45b7e48
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,336 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
+// to preserve more context and manipulate the stack directly). Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback();
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "ARMCompilationCallback\n"
+ ASMPREFIX "ARMCompilationCallback:\n"
+ // Save caller saved registers since they may contain stuff
+ // for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned, so we can't just preserve the callee saved regs.
+ "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ // The LR contains the address of the stub function on entry.
+ // pass it as the argument to the C part of the callback
+ "mov r0, lr\n"
+ "sub sp, sp, #4\n"
+ // Call the C portion of the callback
+ "bl " ASMPREFIX "ARMCompilationCallbackC\n"
+ "add sp, sp, #4\n"
+ // Restoring the LR to the return address of the function that invoked
+ // the stub and de-allocating the stack space for it requires us to
+ // swap the two saved LR values on the stack, as they're backwards
+ // for what we need since the pop instruction has a pre-determined
+ // order for the registers.
+ // +--------+
+ // 0 | LR | Original return address
+ // +--------+
+ // 1 | LR | Stub address (start of stub)
+ // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
+ // 6-20 | D0..D7 | Saved VFP registers
+ // +--------+
+ //
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ // Restore VFP caller-saved registers.
+ "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ //
+ // We need to exchange the values in slots 0 and 1 so we can
+ // return to the address in slot 1 with the address in slot 0
+ // restored to the LR.
+ "ldr r0, [sp,#20]\n"
+ "ldr r1, [sp,#16]\n"
+ "str r1, [sp,#20]\n"
+ "str r0, [sp,#16]\n"
+ // Return to the (newly modified) stub to invoke the real function.
+ // The above twiddling of the saved return addresses allows us to
+ // deallocate everything, including the LR the stub saved, with two
+ // updating load instructions.
+ "ldmia sp!, {r0, r1, r2, r3, lr}\n"
+ "ldr pc, [sp], #4\n"
+ );
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call. We're replacing the first two instructions of the
+ // stub with:
+ // ldr pc, [pc,#-4]
+ // <addr>
+ if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
+ *(intptr_t *)(StubAddr+4) = NewVal;
+ if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+ JITCodeEmitter &JCE) {
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
+ void *PtrAddr = JCE.allocIndirectGV(
+ GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
+ addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+ return PtrAddr;
+}
+
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+ // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+ // 4-byte address. See emitFunctionStub for details.
+ StubLayout Result = {16, 4};
+ return Result;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ void *Addr;
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ // Branch to the corresponding function addr.
+ if (IsPIC) {
+ // The stub is 16-byte size and 4-aligned.
+ intptr_t LazyPtr = getIndirectSymAddr(Fn);
+ if (!LazyPtr) {
+ // In PIC mode, the function stub is loading a lazy-ptr.
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ DEBUG(if (F)
+ errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+ << "] for GV '" << F->getName() << "'\n";
+ else
+ errs() << "JIT: Stub emitted at [" << LazyPtr
+ << "] for external function at '" << Fn << "'\n");
+ }
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
+ JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
+ JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
+ JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ } else {
+ // The stub is 8-byte size and 4-aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ JCE.emitWordLE((intptr_t)Fn); // addr of function
+ sys::Memory::InvalidateInstructionCache(Addr, 8);
+ if (!sys::Memory::setRangeExecutable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+ } else {
+ // The compilation callback will overwrite the first two words of this
+ // stub with indirect branch instructions targeting the compiled code.
+ // This stub sets the return address to restart the stub, so that
+ // the new branch will be invoked when we come back.
+ //
+ // Branch and link to the compilation callback.
+ // The stub is 16-byte size and 4-byte aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ // Save LR so the callback can determine which stub called it.
+ // The compilation callback is responsible for popping this prior
+ // to returning.
+ JCE.emitWordLE(0xe92d4000); // push {lr}
+ // Set the return address to go back to the start of this stub.
+ JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+ // Invoke the compilation callback.
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ // The address of the compilation callback.
+ JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+
+ return Addr;
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+ ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+ switch (RT) {
+ default:
+ return (intptr_t)(MR->getResultPointer());
+ case ARM::reloc_arm_pic_jt:
+ // Destination address - jump table base.
+ return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+ case ARM::reloc_arm_jt_base:
+ // Jump table base address.
+ return getJumpTableBaseAddr(MR->getJumpTableIndex());
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ // Constant pool entry address.
+ return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+ case ARM::reloc_arm_machine_cp_entry: {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+ assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+ "Can't handle this machine constant pool entry yet!");
+ intptr_t Addr = (intptr_t)(MR->getResultPointer());
+ Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+ return Addr;
+ }
+ }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = resolveRelocDestAddr(MR);
+ switch ((ARM::RelocationType)MR->getRelocationType()) {
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_relative: {
+ // It is necessary to calculate the correct PC relative value. We
+ // subtract the base addr from the target addr to form a byte offset.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ // If the result is positive, set bit U(23) to 1.
+ if (ResultPtr >= 0)
+ *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+ else {
+ // Otherwise, obtain the absolute value and set bit U(23) to 0.
+ *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+ ResultPtr = - ResultPtr;
+ }
+ // Set the immed value calculated.
+ // VFP immediate offset is multiplied by 4.
+ if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+ ResultPtr = ResultPtr >> 2;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ // Set register Rn to PC.
+ *((intptr_t*)RelocPos) |=
+ getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ break;
+ }
+ case ARM::reloc_arm_pic_jt:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_absolute: {
+ // These addresses have already been resolved.
+ *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_branch: {
+ // It is necessary to calculate the correct value of signed_immed_24
+ // field. We subtract the base addr from the target addr to form a
+ // byte offset, which must be inside the range -33554432 and +33554428.
+ // Then, we set the signed_immed_24 field of the instruction to bits
+ // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+ assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_jt_base: {
+ // JT base - (instruction addr + 8)
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 0000000..2f97928
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,183 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+ // CONSTPOOL_ENTRY addresses.
+ SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+ // JumpTableId2AddrMap - A map from inline jumptable ids to the
+ // corresponding inline jump table bases.
+ SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+ // PCLabelMap - A map from PC labels to addresses.
+ DenseMap<unsigned, intptr_t> PCLabelMap;
+
+ // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+ // addresses to their indirect symbol addresses.
+ DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+ // IsPIC - True if the relocation model is PIC. This is used to determine
+ // how to codegen function stubs.
+ bool IsPIC;
+
+ public:
+ explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on ARM.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// hasCustomConstantPool - Allows a target to specify that constant
+ /// pool address resolution is handled by the target.
+ virtual bool hasCustomConstantPool() const { return true; }
+
+ /// hasCustomJumpTables - Allows a target to specify that jumptables
+ /// are emitted by the target.
+ virtual bool hasCustomJumpTables() const { return true; }
+
+ /// allocateSeparateGVMemory - If true, globals should be placed in
+ /// separately allocated heap memory rather than in the same
+ /// code memory allocated by JITCodeEmitter.
+ virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+ return true;
+#else
+ return false;
+#endif
+ }
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+ /// jump table ids to jump table bases map; remember if codegen relocation
+ /// model is PIC.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+ }
+
+ /// getConstantPoolEntryAddr - The ARM target puts all constant
+ /// pool entries into constant islands. This returns the address of the
+ /// constant pool entry of the specified index.
+ intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ return ConstPoolId2AddrMap[CPI];
+ }
+
+ /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+ /// where its associated value is stored. When relocations are processed,
+ /// this value will be used to resolve references to the constant.
+ void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ ConstPoolId2AddrMap[CPI] = Addr;
+ }
+
+ /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+ /// text section of the function. This returns the address of the base of
+ /// the jump table of the specified index.
+ intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+ assert(JTI < JumpTableId2AddrMap.size());
+ return JumpTableId2AddrMap[JTI];
+ }
+
+ /// addJumpTableBaseAddr - Map a jump table index to the address where
+ /// the corresponding inline jump table is emitted. When relocations are
+ /// processed, this value will be used to resolve references to the
+ /// jump table.
+ void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+ assert(JTI < JumpTableId2AddrMap.size());
+ JumpTableId2AddrMap[JTI] = Addr;
+ }
+
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
+ intptr_t getPCLabelAddr(unsigned Id) const {
+ DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+ assert(I != PCLabelMap.end());
+ return I->second;
+ }
+
+ /// addPCLabelAddr - Remember the address of the specified PC label.
+ void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+ PCLabelMap.insert(std::make_pair(Id, Addr));
+ }
+
+ /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+ /// specified symbol located at address. Returns 0 if the indirect symbol
+ /// has not been emitted.
+ intptr_t getIndirectSymAddr(void *Addr) const {
+ DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+ if (I != Sym2IndirectSymMap.end())
+ return I->second;
+ return 0;
+ }
+
+ /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+ /// its indirect symbol address.
+ void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+ Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+ }
+
+ private:
+ /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+ /// if it's not already solved. Constantpool entries must be resolved by
+ /// ARM target.
+ intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000..faa8ba7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,1850 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
+
+namespace {
+ struct ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+ bool isThumb2;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Reg;
+ bool isKill;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ void MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &MemOps,
+ unsigned memOpsBegin,
+ unsigned memOpsEnd,
+ unsigned insertAfter,
+ int Offset,
+ unsigned Base,
+ bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA;
+ case ARM_AM::da: return ARM::LDMDA;
+ case ARM_AM::db: return ARM::LDMDB;
+ case ARM_AM::ib: return ARM::LDMIB;
+ }
+ break;
+ case ARM::STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA;
+ case ARM_AM::da: return ARM::STMDA;
+ case ARM_AM::db: return ARM::STMDB;
+ case ARM_AM::ib: return ARM::STMIB;
+ }
+ break;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA;
+ case ARM_AM::db: return ARM::t2LDMDB;
+ }
+ break;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA;
+ case ARM_AM::db: return ARM::t2STMDB;
+ }
+ break;
+ case ARM::VLDRS:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA;
+ case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
+ }
+ break;
+ case ARM::VSTRS:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA;
+ case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
+ }
+ break;
+ case ARM::VLDRD:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA;
+ case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
+ }
+ break;
+ case ARM::VSTRD:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA;
+ case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
+ }
+ break;
+ }
+
+ return 0;
+}
+
+namespace llvm {
+ namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMIA_UPD:
+ case ARM::STMIA:
+ case ARM::STMIA_UPD:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMIA_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ return ARM_AM::ia;
+
+ case ARM::LDMDA:
+ case ARM::LDMDA_UPD:
+ case ARM::STMDA:
+ case ARM::STMDA_UPD:
+ return ARM_AM::da;
+
+ case ARM::LDMDB:
+ case ARM::LDMDB_UPD:
+ case ARM::STMDB:
+ case ARM::STMDB_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMDB:
+ case ARM::t2STMDB_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ return ARM_AM::db;
+
+ case ARM::LDMIB:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIB:
+ case ARM::STMIB_UPD:
+ return ARM_AM::ib;
+ }
+
+ return ARM_AM::bad_am_submode;
+}
+
+ } // end namespace ARM_AM
+} // end namespace llvm
+
+static bool isT2i32Load(unsigned Opc) {
+ return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+ return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+ return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+ return Opc == ARM::STRi12 || isT2i32Store(Opc);
+}
+
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill,
+ int Opcode, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned Scratch, DebugLoc dl,
+ SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
+ Mode = ARM_AM::ib;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ Mode = ARM_AM::da;
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // Check if this is a supported opcode before we insert instructions to
+ // calculate a new base register.
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (isi32Load(Opcode))
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+ if (Offset < 0) {
+ BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = isThumb2
+ ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ // FIXME: Try t2ADDri12 or t2SUBri12?
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VLDRD);
+ Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+ if (!Opcode) return false;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+ | getKillRegState(Regs[i].second));
+
+ return true;
+}
+
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ // First calculate which of the registers should be killed by the merged
+ // instruction.
+ const unsigned insertPos = memOps[insertAfter].Position;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+ if (i == memOpsBegin) {
+ i = memOpsEnd;
+ if (i == e)
+ break;
+ }
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ unsigned Reg = memOps[i].Reg;
+ // If we are inserting the merged operation after an operation that
+ // uses the same register, make sure to transfer any kill flag.
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ // Try to do the merge.
+ MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+ ++Loc;
+ if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+ Pred, PredReg, Scratch, dl, Regs))
+ return;
+
+ // Merge succeeded, update records.
+ Merges.push_back(prior(Loc));
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ // Remove kill flags from any memops that come before insertPos.
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+ assert(Idx >= 0 && "Cannot find killing operand");
+ memOps[j].MBBI->getOperand(Idx).setIsKill(false);
+ memOps[j].isKill = false;
+ }
+ memOps[i].isKill = true;
+ }
+ MBB.erase(memOps[i].MBBI);
+ // Update this memop to refer to the merged instruction.
+ // We may need to move kill flags again.
+ memOps[i].Merged = true;
+ memOps[i].MBBI = Merges.back();
+ memOps[i].Position = insertPos;
+ }
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+void
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned insertAfter = SIndex;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ DebugLoc dl = Loc->getDebugLoc();
+ const MachineOperand &PMO = Loc->getOperand(0);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX
+ : getARMRegisterNumbering(PReg);
+ unsigned Count = 1;
+ unsigned Limit = ~0U;
+
+ // vldm / vstm limit are 32 for S variants, 16 for D variants.
+
+ switch (Opcode) {
+ default: break;
+ case ARM::VSTRS:
+ Limit = 32;
+ break;
+ case ARM::VSTRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRS:
+ Limit = 32;
+ break;
+ }
+
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX
+ : getARMRegisterNumbering(Reg);
+ // Register numbers must be in ascending order. For VFP / NEON load and
+ // store multiples, the registers must also be consecutive and within the
+ // limit on the number of registers per instruction.
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
+ ((isNotVFP && RegNum > PRegNum) ||
+ ((Count < Limit) && RegNum == PRegNum+1))) {
+ Offset += Size;
+ PRegNum = RegNum;
+ ++Count;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+ Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+ MemOps, Merges);
+ return;
+ }
+
+ if (MemOps[i].Position > MemOps[insertAfter].Position)
+ insertAfter = i;
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+ Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ return;
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg){
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+ if (MI->getOpcode() != ARM::t2SUBri &&
+ MI->getOpcode() != ARM::tSUBspi &&
+ MI->getOpcode() != ARM::SUBri)
+ return false;
+
+ // Make sure the offset fits in 8 bits.
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ return (MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg){
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+ if (MI->getOpcode() != ARM::t2ADDri &&
+ MI->getOpcode() != ARM::tADDspi &&
+ MI->getOpcode() != ARM::ADDri)
+ return false;
+
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ // Make sure the offset fits in 8 bits.
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ return (MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+ ARM_AM::AMSubMode Mode) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA_UPD;
+ case ARM_AM::ib: return ARM::LDMIB_UPD;
+ case ARM_AM::da: return ARM::LDMDA_UPD;
+ case ARM_AM::db: return ARM::LDMDB_UPD;
+ }
+ break;
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA_UPD;
+ case ARM_AM::ib: return ARM::STMIB_UPD;
+ case ARM_AM::da: return ARM::STMDA_UPD;
+ case ARM_AM::db: return ARM::STMDB_UPD;
+ }
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+ case ARM_AM::db: return ARM::t2LDMDB_UPD;
+ }
+ break;
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA_UPD;
+ case ARM_AM::db: return ARM::t2STMDB_UPD;
+ }
+ break;
+ case ARM::VLDMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+ case ARM_AM::db: return ARM::VLDMSDB_UPD;
+ }
+ break;
+ case ARM::VLDMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+ case ARM_AM::db: return ARM::VLDMDDB_UPD;
+ }
+ break;
+ case ARM::VSTMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+ case ARM_AM::db: return ARM::VSTMSDB_UPD;
+ }
+ break;
+ case ARM::VSTMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+ case ARM_AM::db: return ARM::VSTMDDB_UPD;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ bool BaseKill = MI->getOperand(0).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+
+ bool DoMerge = false;
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
+ // Transfer the rest of operands.
+ for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_PRE_IMM;
+ case ARM::STRi12:
+ return ARM::STR_PRE_IMM;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_PRE;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_PRE;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_POST_IMM;
+ case ARM::STRi12:
+ return ARM::STR_POST_IMM;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_POST;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_POST;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+ if (isi32Load(Opcode) || isi32Store(Opcode))
+ if (MI->getOperand(2).getImm() != 0)
+ return false;
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ return false;
+
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ // AM2 - 12 bits, thumb2 - 8 bits.
+ unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (!isAM5 &&
+ isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
+ MBB.erase(PrevMBBI);
+ }
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (!isAM5 &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ if (isAM5) {
+ // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
+ MachineOperand &MO = MI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+ getKillRegState(MO.isKill())));
+ } else if (isLd) {
+ if (isAM2) {
+ // LDR_PRE, LDR_POST
+ if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ // t2LDR_PRE, t2LDR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ // FIXME: post-indexed stores use am2offset_imm, which still encodes
+ // the vestigal zero-reg offset register. When that's fixed, this clause
+ // can be removed entirely.
+ if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ // STR_PRE, STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ // t2STR_PRE, t2STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operation that this
+/// pass is capable of operating on.
+static bool isMemoryOp(const MachineInstr *MI) {
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return MI->getOperand(1).isReg();
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return MI->getOperand(1).isReg();
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return MI->getOperand(1).isReg();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM3) {
+ if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ return Offset;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Offset, bool isDef,
+ DebugLoc dl, unsigned NewOpc,
+ unsigned Reg, bool RegDeadKill, bool RegUndef,
+ unsigned BaseReg, bool BaseKill, bool BaseUndef,
+ bool OffKill, bool OffUndef,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isT2) {
+ if (isDef) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+ if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+ return false;
+
+ MachineBasicBlock::iterator NewBBI = MBBI;
+ bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+ bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool EvenUndef = MI->getOperand(0).isUndef();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ bool OddUndef = MI->getOperand(1).isUndef();
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ bool BaseKill = BaseOp.isKill();
+ bool BaseUndef = BaseOp.isUndef();
+ bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+ bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg,
+ getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+ .addReg(OddReg,
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ ++NumSTRD2STM;
+ }
+ NewBBI = llvm::prior(MBBI);
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, false,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, false,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ } else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, EvenUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, OddUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ }
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
+ }
+
+ MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
+ }
+ return false;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+ SmallVector<MachineBasicBlock::iterator,4> Merges;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ if (FixInvalidRegPairOp(MBB, MBBI))
+ continue;
+
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+ int Offset = getMemoryOpOffset(MBBI);
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
+ ++Position;
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register.
+ unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ Merges.clear();
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps, Merges);
+
+ // Try folding preceding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+ if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+ ++NumMerges;
+ NumMerges += Merges.size();
+
+ // Try folding preceding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+ ++NumMerges;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ } else if (NumMemOps == 1) {
+ // Try folding preceding/trailing base inc/dec into the single
+ // load/store.
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+ ++NumMerges;
+ RS->forward(prior(MBBI));
+ }
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
+/// directly restore the value of LR into pc.
+/// ldmfd sp!, {..., lr}
+/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
+/// =>
+/// ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI != MBB.begin() &&
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
+ MachineInstr *PrevMI = prior(MBBI);
+ unsigned Opcode = PrevMI->getOpcode();
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+ Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() != ARM::LR)
+ return false;
+ unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+ assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+ PrevMI->setDesc(TII->get(NewOpc));
+ MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ RS = new RegScavenger();
+ isThumb2 = AFI->isThumb2Function();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+ struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetData *TD;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ MachineRegisterInfo *MRI;
+ MachineFunction *MF;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pre- register allocation load / store optimization pass";
+ }
+
+ private:
+ bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset,
+ unsigned &PredReg, ARMCC::CondCodes &Pred,
+ bool &isT2);
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TD = Fn.getTarget().getTargetData();
+ TII = Fn.getTarget().getInstrInfo();
+ TRI = Fn.getTarget().getRegisterInfo();
+ STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ MRI = &Fn.getRegInfo();
+ MF = &Fn;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI)
+ Modified |= RescheduleLoadStoreInstrs(MFI);
+
+ return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallSet<unsigned, 4> &MemRegs,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ SmallSet<unsigned, 4> AddedRegPressure;
+ while (++I != E) {
+ if (I->isDebugValue() || MemOps.count(&*I))
+ continue;
+ const MCInstrDesc &MCID = I->getDesc();
+ if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ return false;
+ if (isLd && MCID.mayStore())
+ return false;
+ if (!isLd) {
+ if (MCID.mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (MCID.mayStore())
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+ return false;
+ if (Reg != Base && !MemRegs.count(Reg))
+ AddedRegPressure.insert(Reg);
+ }
+ }
+
+ // Estimate register pressure increase due to the transformation.
+ if (MemRegs.size() <= 4)
+ // Ok if we are moving small number of instructions.
+ return true;
+ return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+ DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset, unsigned &PredReg,
+ ARMCC::CondCodes &Pred,
+ bool &isT2) {
+ // Make sure we're allowed to generate LDRD/STRD.
+ if (!STI->hasV5TEOps())
+ return false;
+
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+ unsigned Scale = 1;
+ unsigned Opcode = Op0->getOpcode();
+ if (Opcode == ARM::LDRi12)
+ NewOpc = ARM::LDRD;
+ else if (Opcode == ARM::STRi12)
+ NewOpc = ARM::STRD;
+ else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ NewOpc = ARM::t2LDRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+ NewOpc = ARM::t2STRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else
+ return false;
+
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
+ if (!Op0->hasOneMemOperand() ||
+ !(*Op0->memoperands_begin())->getValue() ||
+ (*Op0->memoperands_begin())->isVolatile())
+ return false;
+
+ unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+ const Function *Func = MF->getFunction();
+ unsigned ReqAlign = STI->hasV6Ops()
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+ : 8; // Pre-v6 need 8-byte align
+ if (Align < ReqAlign)
+ return false;
+
+ // Then make sure the immediate offset fits.
+ int OffImm = getMemoryOpOffset(Op0);
+ if (isT2) {
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+ return false;
+ Offset = OffImm;
+ } else {
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (OffImm < 0) {
+ AddSub = ARM_AM::sub;
+ OffImm = - OffImm;
+ }
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm & (Scale-1)))
+ return false;
+ Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+ }
+ EvenReg = Op0->getOperand(0).getReg();
+ OddReg = Op1->getOperand(0).getReg();
+ if (EvenReg == OddReg)
+ return false;
+ BaseReg = Op0->getOperand(1).getReg();
+ Pred = llvm::getInstrPredicate(Op0, PredReg);
+ dl = Op0->getDebugLoc();
+ return true;
+}
+
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the following:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = 0;
+ MachineInstr *LastOp = 0;
+ int LastOffset = 0;
+ unsigned LastOpcode = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ unsigned Opcode = Op->getOpcode();
+ if (LastOpcode && Opcode != LastOpcode)
+ break;
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ LastOpcode = Opcode;
+ if (++NumMove == 8) // FIXME: Tune this limit.
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MemOps;
+ SmallSet<unsigned, 4> MemRegs;
+ for (int i = NumMove-1; i >= 0; --i) {
+ MemOps.insert(Ops[i]);
+ MemRegs.insert(Ops[i]->getOperand(0).getReg());
+ }
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+ if (DoMove)
+ DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+ MemOps, MemRegs, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ ++InsertPos;
+
+ // If we are moving a pair of loads / stores, see if it makes sense
+ // to try to allocate a pair of registers that can form register pairs.
+ MachineInstr *Op0 = Ops.back();
+ MachineInstr *Op1 = Ops[Ops.size()-2];
+ unsigned EvenReg = 0, OddReg = 0;
+ unsigned BaseReg = 0, PredReg = 0;
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ bool isT2 = false;
+ unsigned NewOpc = 0;
+ int Offset = 0;
+ DebugLoc dl;
+ if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+ EvenReg, OddReg, BaseReg,
+ Offset, PredReg, Pred, isT2)) {
+ Ops.pop_back();
+ Ops.pop_back();
+
+ const MCInstrDesc &MCID = TII->get(NewOpc);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+ MRI->constrainRegClass(EvenReg, TRC);
+ MRI->constrainRegClass(OddReg, TRC);
+
+ // Form the pair instruction.
+ if (isLd) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(EvenReg, RegState::Define)
+ .addReg(OddReg, RegState::Define)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ ++NumLDRDFormed;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(EvenReg)
+ .addReg(OddReg)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming STRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ ++NumSTRDFormed;
+ }
+ MBB->erase(Op0);
+ MBB->erase(Op1);
+
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+ MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ } else {
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.isCall() || MCID.isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
+ if (!isMemoryOp(MI))
+ continue;
+ unsigned PredReg = 0;
+ if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opc = MI->getOpcode();
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2LdsMap[Base] = MIs;
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2StsMap[Base] = MIs;
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 0000000..daa126d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,125 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ switch (MO.getTargetFlags()) {
+ default: {
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on symbol operand");
+ case 0:
+ break;
+ case ARMII::MO_LO16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
+ break;
+ case ARMII::MO_HI16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
+ break;
+ }
+ break;
+ }
+
+ case ARMII::MO_PLT:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT,
+ OutContext);
+ break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(),
+ OutContext),
+ OutContext);
+ return MCOperand::CreateExpr(Expr);
+
+}
+
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) {
+ switch (MO.getType()) {
+ default:
+ assert(0 && "unknown operand type");
+ return false;
+ case MachineOperand::MO_Register:
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO,
+ GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+ }
+ return true;
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000..138f0c2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,250 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+ /// to determine if function is compiled under Thumb mode, for that use
+ /// 'isThumb'.
+ bool hasThumb2;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+ unsigned PICLabelUId;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
+ /// CPEClones - Track constant pool entries clones created by Constant Island
+ /// pass.
+ DenseMap<unsigned, unsigned> CPEClones;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ hasThumb2(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ explicit ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ bool isThumbFunction() const { return isThumb; }
+ bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+ bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+
+ unsigned getNumJumpTables() const {
+ return JumpTableUId;
+ }
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+ unsigned getNumPICLabels() const {
+ return PICLabelUId;
+ }
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+ void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+ if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+ assert(0 && "Duplicate entries!");
+ }
+
+ unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+ if (I != CPEClones.end())
+ return I->second;
+ else
+ return -1U;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 0000000..18e1620
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+ 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+ 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+ 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+ 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+ 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+ 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+ 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+ 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+ 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+ 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+ 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+ 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+ 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+ 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+ 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+ 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+ 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+ 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+ 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+ 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+ 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+ 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+ 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+ 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+ 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+ 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+ 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+ 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+ 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+ 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+ 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+ 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+ 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+ 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+ 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+ 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+ 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+ 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+ 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+ 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+ 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+ 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+ 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+ 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+ 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+ 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+ 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+ 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+ 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+ 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+ 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+ 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+ 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+ 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+ 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+ 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+ 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+ 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+ 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+ 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+ 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+ 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+ 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+ 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+ 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+ 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+ 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+ 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+ 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+ 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+ 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+ 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+ 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+ 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+ 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+ 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+ 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+ 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+ 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+ 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+ 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+ 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+ 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+ 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+ 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+ 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+ 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+ 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+ 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+ 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+ 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+ 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+ 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+ 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+ 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+ 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+ 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+ 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+ 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+ 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+ 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+ 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+ 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+ 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+ 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+ 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+ 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+ 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+ 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+ 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+ 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+ 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+ 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+ 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+ 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+ 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+ 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+ 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+ 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+ 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+ 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+ 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+ 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+ 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+ 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+ 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+ 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+ 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+ 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+ 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+ 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+ 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+ 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+ 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+ 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+ 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+ 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+ 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+ 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+ 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+ 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+ 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+ 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+ 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+ 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+ 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+ 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+ 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+ 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+ 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+ 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+ 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+ 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+ 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+ 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+ 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+ 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+ 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+ 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+ 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+ 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+ 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+ 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+ 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+ 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+ 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+ 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+ 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+ 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+ 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+ 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+ 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+ 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+ 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+ 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+ 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+ 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+ 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+ 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+ 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+ 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+ 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+ 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+ 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+ 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+ 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+ 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+ 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+ 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+ 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+ 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+ 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+ 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+ 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+ 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+ 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+ 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+ 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+ 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+ 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+ 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+ 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+ 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+ 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+ 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+ 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+ 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+ 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+ 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+ 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+ 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+ 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+ 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+ 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+ 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+ 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+ 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+ 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+ 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+ 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+ 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+ 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+ 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+ 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+ 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+ 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+ 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+ 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+ 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+ 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+ 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+ 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+ 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+ 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+ 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+ 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+ 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+ 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+ 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+ 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+ 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+ 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+ 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+ 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+ 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+ 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+ 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+ 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+ 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+ 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+ 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+ 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+ 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+ 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+ 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+ 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+ 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+ 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+ 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+ 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+ 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+ 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+ 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+ 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+ 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+ 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+ 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+ 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+ 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+ 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+ 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+ 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+ 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+ 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+ 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+ 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+ 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+ 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+ 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+ 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+ 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+ 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+ 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+ 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+ 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+ 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+ 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+ 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+ 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+ 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+ 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+ 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+ 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+ 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+ 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+ 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+ 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+ 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+ 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+ 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+ 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+ 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+ 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+ 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+ 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+ 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+ 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+ 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+ 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+ 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+ 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+ 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+ 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+ 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+ 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+ 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+ 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+ 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+ 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+ 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+ 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+ 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+ 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+ 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+ 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+ 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+ 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+ 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+ 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+ 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+ 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+ 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+ 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+ 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+ 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+ 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+ 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+ 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+ 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+ 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+ 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+ 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+ 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+ 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+ 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+ 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+ 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+ 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+ 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+ 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+ 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+ 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+ 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+ 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+ 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+ 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+ 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+ 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+ 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+ 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+ 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+ 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+ 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+ 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+ 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+ 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+ 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+ 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+ 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+ 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+ 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+ 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+ 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+ 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+ 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+ 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+ 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+ 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+ 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+ 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+ 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+ 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+ 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+ 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+ 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+ 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+ 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+ 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+ 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+ 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+ 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+ 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+ 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+ 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+ 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+ 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+ 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+ 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+ 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+ 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+ 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+ 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+ 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+ 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+ 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+ 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+ 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+ 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+ 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+ 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+ 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+ 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+ 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+ 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+ 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+ 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+ 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+ 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+ 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+ 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+ 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+ 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+ 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+ 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+ 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+ 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+ 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+ 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+ 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+ 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+ 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+ 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+ 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+ 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+ 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+ 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+ 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+ 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+ 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+ 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+ 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+ 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+ 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+ 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+ 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+ 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+ 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+ 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+ 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+ 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+ 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+ 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+ 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+ 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+ 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+ 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+ 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+ 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+ 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+ 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+ 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+ 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+ 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+ 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+ 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+ 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+ 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+ 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+ 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+ 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+ 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+ 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+ 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+ 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+ 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+ 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+ 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+ 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+ 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+ 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+ 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+ 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+ 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+ 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+ 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+ 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+ 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+ 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+ 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+ 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+ 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+ 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+ 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+ 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+ 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+ 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+ 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+ 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+ 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+ 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+ 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+ 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+ 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+ 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+ 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+ 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+ 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+ 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+ 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+ 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+ 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+ 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+ 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+ 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+ 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+ 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+ 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+ 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+ 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+ 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+ 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+ 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+ 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+ 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+ 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+ 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+ 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+ 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+ 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+ 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+ 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+ 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+ 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+ 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+ 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+ 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+ 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+ 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+ 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+ 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+ 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+ 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+ 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+ 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+ 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+ 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+ 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+ 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+ 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+ 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+ 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+ 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+ 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+ 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+ 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+ 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+ 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+ 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+ 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+ 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+ 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+ 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+ 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+ 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+ 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+ 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+ 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+ 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+ 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+ 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+ 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+ 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+ 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+ 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+ 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+ 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+ 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+ 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+ 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+ 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+ 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+ 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+ 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+ 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+ 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+ 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+ 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+ 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+ 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+ 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+ 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+ 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+ 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+ 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+ 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+ 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+ 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+ 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+ 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+ 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+ 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+ 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+ 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+ 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+ 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+ 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+ 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+ 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+ 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+ 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+ 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+ 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+ 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+ 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+ 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+ 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+ 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+ 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+ 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+ 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+ 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+ 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+ 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+ 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+ 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+ 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+ 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+ 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+ 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+ 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+ 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+ 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+ 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+ 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+ 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+ 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+ 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+ 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+ 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+ 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+ 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+ 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+ 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+ 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+ 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+ 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+ 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+ 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+ 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+ 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+ 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+ 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+ 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+ 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+ 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+ 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+ 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+ 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+ 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+ 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+ 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+ 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+ 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+ 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+ 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+ 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+ 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+ 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+ 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+ 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+ 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+ 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+ 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+ 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+ 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+ 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+ 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+ 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+ 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+ 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+ 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+ 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+ 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+ 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+ 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+ 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+ 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+ 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+ 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+ 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+ 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+ 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+ 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+ 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+ 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+ 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+ 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+ 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+ 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+ 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+ 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+ 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+ 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+ 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+ 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+ 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+ 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+ 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+ 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+ 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+ 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+ 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+ 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+ 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+ 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+ 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+ 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+ 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+ 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+ 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+ 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+ 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+ 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+ 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+ 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+ 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+ 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+ 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+ 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+ 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+ 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+ 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+ 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+ 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+ 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+ 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+ 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+ 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+ 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+ 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+ 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+ 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+ 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+ 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+ 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+ 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+ 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+ 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+ 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+ 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+ 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+ 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+ 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+ 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+ 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+ 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+ 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+ 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+ 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+ 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+ 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+ 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+ 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+ 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+ 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+ 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+ 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+ 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+ 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+ 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+ 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+ 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+ 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+ 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+ 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+ 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+ 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+ 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+ 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+ 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+ 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+ 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+ 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+ 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+ 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+ 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+ 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+ 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+ 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+ 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+ 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+ 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+ 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+ 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+ 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+ 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+ 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+ 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+ 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+ 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+ 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+ 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+ 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+ 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+ 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+ 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+ 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+ 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+ 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+ 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+ 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+ 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+ 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+ 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+ 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+ 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+ 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+ 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+ 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+ 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+ 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+ 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+ 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+ 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+ 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+ 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+ 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+ 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+ 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+ 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+ 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+ 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+ 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+ 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+ 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+ 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+ 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+ 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+ 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+ 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+ 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+ 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+ 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+ 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+ 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+ 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+ 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+ 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+ 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+ 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+ 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+ 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+ 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+ 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+ 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+ 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+ 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+ 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+ 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+ 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+ 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+ 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+ 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+ 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+ 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+ 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+ 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+ 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+ 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+ 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+ 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+ 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+ 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+ 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+ 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+ 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+ 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+ 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+ 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+ 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+ 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+ 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+ 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+ 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+ 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+ 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+ 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+ 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+ 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+ 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+ 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+ 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+ 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+ 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+ 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+ 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+ 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+ 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+ 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+ 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+ 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+ 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+ 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+ 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+ 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+ 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+ 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+ 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+ 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+ 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+ 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+ 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+ 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+ 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+ 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+ 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+ 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+ 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+ 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+ 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+ 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+ 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+ 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+ 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+ 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+ 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+ 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+ 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+ 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+ 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+ 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+ 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+ 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+ 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+ 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+ 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+ 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+ 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+ 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+ 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+ 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+ 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+ 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+ 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+ 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+ 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+ 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+ 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+ 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+ 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+ 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+ 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+ 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+ 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+ 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+ 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+ 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+ 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+ 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+ 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+ 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+ 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+ 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+ 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+ 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+ 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+ 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+ 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+ 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+ 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+ 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+ 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+ 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+ 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+ 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+ 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+ 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+ 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+ 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+ 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+ 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+ 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+ 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+ 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+ 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+ 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+ 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+ 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+ 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+ 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+ 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+ 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+ 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+ 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+ 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+ 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+ 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+ 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+ 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+ 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+ 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+ 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+ 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+ 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+ 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+ 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+ 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+ 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+ 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+ 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+ 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+ 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+ 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+ 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+ 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+ 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+ 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+ 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+ 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+ 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+ 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+ 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+ 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+ 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+ 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+ 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+ 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+ 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+ 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+ 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+ 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+ 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+ 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+ 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+ 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+ 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+ 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+ 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+ 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+ 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+ 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+ 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+ 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+ 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+ 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+ 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+ 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+ 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+ 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+ 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+ 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+ 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+ 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+ 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+ 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+ 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+ 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+ 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+ 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+ 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+ 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+ 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+ 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+ 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+ 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+ 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+ 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+ 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+ 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+ 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+ 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+ 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+ 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+ 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+ 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+ 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+ 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+ 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+ 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+ 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+ 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+ 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+ 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+ 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+ 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+ 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+ 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+ 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+ 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+ 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+ 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+ 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+ 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+ 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+ 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+ 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+ 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+ 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+ 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+ 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+ 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+ 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+ 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+ 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+ 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+ 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+ 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+ 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+ 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+ 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+ 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+ 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+ 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+ 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+ 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+ 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+ 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+ 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+ 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+ 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+ 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+ 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+ 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+ 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+ 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+ 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+ 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+ 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+ 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+ 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+ 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+ 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+ 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+ 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+ 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+ 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+ 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+ 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+ 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+ 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+ 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+ 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+ 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+ 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+ 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+ 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+ 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+ 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+ 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+ 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+ 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+ 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+ 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+ 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+ 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+ 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+ 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+ 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+ 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+ 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+ 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+ 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+ 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+ 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+ 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+ 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+ 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+ 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+ 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+ 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+ 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+ 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+ 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+ 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+ 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+ 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+ 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+ 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+ 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+ 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+ 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+ 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+ 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+ 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+ 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+ 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+ 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+ 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+ 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+ 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+ 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+ 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+ 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+ 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+ 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+ 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+ 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+ 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+ 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+ 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+ 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+ 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+ 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+ 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+ 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+ 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+ 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+ 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+ 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+ 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+ 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+ 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+ 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+ 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+ 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+ 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+ 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+ 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+ 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+ 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+ 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+ 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+ 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+ 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+ 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+ 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+ 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+ 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+ 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+ 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+ 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+ 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+ 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+ 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+ 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+ 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+ 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+ 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+ 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+ 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+ 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+ 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+ 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+ 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+ 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+ 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+ 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+ 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+ 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+ 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+ 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+ 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+ 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+ 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+ 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+ 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+ 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+ 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+ 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+ 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+ 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+ 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+ 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+ 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+ 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+ 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+ 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+ 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+ 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+ 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+ 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+ 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+ 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+ 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+ 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+ 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+ 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+ 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+ 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+ 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+ 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+ 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+ 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+ 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+ 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+ 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+ 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+ 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+ 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+ 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+ 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+ 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+ 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+ 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+ 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+ 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+ 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+ 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+ 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+ 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+ 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+ 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+ 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+ 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+ 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+ 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+ 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+ 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+ 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+ 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+ 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+ 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+ 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+ 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+ 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+ 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+ 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+ 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+ 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+ 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+ 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+ 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+ 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+ 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+ 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+ 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+ 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+ 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+ 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+ 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+ 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+ 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+ 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+ 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+ 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+ 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+ 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+ 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+ 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+ 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+ 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+ 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+ 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+ 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+ 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+ 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+ 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+ 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+ 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+ 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+ 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+ 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+ 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+ 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+ 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+ 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+ 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+ 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+ 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+ 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+ 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+ 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+ 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+ 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+ 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+ 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+ 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+ 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+ 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+ 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+ 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+ 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+ 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+ 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+ 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+ 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+ 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+ 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+ 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+ 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+ 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+ 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+ 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+ 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+ 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+ 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+ 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+ 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+ 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+ 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+ 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+ 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+ 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+ 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+ 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+ 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+ 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+ 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+ 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+ 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+ 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+ 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+ 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+ 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+ 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+ 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+ 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+ 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+ 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+ 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+ 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+ 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+ 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+ 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+ 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+ 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+ 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+ 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+ 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+ 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+ 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+ 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+ 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+ 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+ 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+ 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+ 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+ 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+ 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+ 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+ 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+ 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+ 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+ 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+ 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+ 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+ 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+ 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+ 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+ 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+ 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+ 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+ 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+ 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+ 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+ 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+ 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+ 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+ 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+ 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+ 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+ 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+ 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+ 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+ 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+ 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+ 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+ 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+ 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+ 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+ 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+ 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+ 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+ 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+ 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+ 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+ 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+ 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+ 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+ 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+ 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+ 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+ 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+ 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+ 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+ 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+ 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+ 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+ 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+ 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+ 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+ 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+ 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+ 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+ 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+ 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+ 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+ 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+ 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+ 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+ 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+ 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+ 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+ 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+ 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+ 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+ 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+ 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+ 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+ 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+ 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+ 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+ 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+ 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+ 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+ 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+ 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+ 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+ 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+ 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+ 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+ 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+ 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+ 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+ 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+ 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+ 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+ 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+ 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+ 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+ 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+ 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+ 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+ 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+ 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+ 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+ 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+ 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+ 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+ 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+ 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+ 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+ 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+ 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+ 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+ 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+ 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+ 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+ 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+ 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+ 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+ 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+ 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+ 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+ 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+ 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+ 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+ 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+ 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+ 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+ 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+ 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+ 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+ 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+ 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+ 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+ 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+ 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+ 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+ 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+ 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+ 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+ 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+ 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+ 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+ 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+ 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+ 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+ 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+ 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+ 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+ 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+ 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+ 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+ 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+ 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+ 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+ 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+ 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+ 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+ 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+ 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+ 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+ 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+ 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+ 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+ 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+ 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+ 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+ 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+ 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+ 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+ 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+ 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+ 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+ 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+ 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+ 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+ 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+ 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+ 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+ 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+ 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+ 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+ 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+ 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+ 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+ 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+ 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+ 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+ 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+ 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+ 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+ 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+ 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+ 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+ 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+ 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+ 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+ 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+ 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+ 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+ 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+ 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+ 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+ 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+ 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+ 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+ 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+ 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+ 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+ 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+ 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+ 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+ 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+ 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+ 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+ 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+ 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+ 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+ 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+ 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+ 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+ 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+ 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+ 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+ 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+ 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+ 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+ 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+ 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+ 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+ 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+ 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+ 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+ 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+ 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+ 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+ 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+ 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+ 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+ 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+ 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+ 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+ 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+ 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+ 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+ 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+ 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+ 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+ 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+ 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+ 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+ 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+ 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+ 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+ 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+ 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+ 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+ 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+ 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+ 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+ 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+ 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+ 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+ 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+ 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+ 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+ 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+ 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+ 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+ 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+ 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+ 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+ 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+ 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+ 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+ 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+ 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+ 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+ 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+ 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+ 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+ 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+ 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+ 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+ 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+ 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+ 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+ 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+ 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+ 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+ 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+ 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+ 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+ 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+ 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+ 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+ 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+ 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+ 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+ 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+ 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+ 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+ 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+ 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+ 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+ 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+ 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+ 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+ 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+ 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+ 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+ 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+ 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+ 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+ 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+ 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+ 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+ 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+ 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+ 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+ 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+ 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+ 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+ 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+ 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+ 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+ 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+ 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+ 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+ 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+ 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+ 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+ 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+ 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+ 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+ 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+ 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+ 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+ 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+ 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+ 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+ 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+ 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+ 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+ 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+ 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+ 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+ 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+ 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+ 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+ 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+ 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+ 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+ 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+ 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+ 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+ 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+ 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+ 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+ 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+ 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+ 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+ 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+ 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+ 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+ 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+ 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+ 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+ 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+ 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+ 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+ 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+ 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+ 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+ 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+ 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+ 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+ 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+ 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+ 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+ 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+ 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+ 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+ 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+ 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+ 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+ 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+ 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+ 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+ 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+ 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+ 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+ 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+ 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+ 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+ 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+ 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+ 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+ 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+ 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+ 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+ 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+ 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+ 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+ 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+ 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+ 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+ 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+ 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+ 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+ 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+ 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+ 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+ 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+ 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+ 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+ 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+ 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+ 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+ 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+ 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+ 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+ 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+ 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+ 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+ 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+ 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+ 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+ 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+ 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+ 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+ 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+ 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+ 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+ 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+ 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+ 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+ 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+ 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+ 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+ 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+ 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+ 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+ 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+ 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+ 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+ 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+ 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+ 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+ 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+ 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+ 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+ 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+ 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+ 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+ 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+ 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+ 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+ 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+ 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+ 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+ 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+ 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+ 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+ 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+ 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+ 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+ 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+ 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+ 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+ 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+ 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+ 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+ 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+ 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+ 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+ 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+ 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+ 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+ 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+ 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+ 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+ 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+ 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+ 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+ 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+ 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+ 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+ 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+ 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+ 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+ 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+ 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+ 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+ 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+ 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+ 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+ 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+ 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+ 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+ 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+ 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+ 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+ 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+ 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+ 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+ 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+ 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+ 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+ 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+ 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+ 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+ 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+ 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+ 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+ 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+ 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+ 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+ 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+ 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+ 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+ 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+ 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+ 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+ 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+ 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+ 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+ 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+ 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+ 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+ 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+ 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+ 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+ 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+ 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+ 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+ 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+ 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+ 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+ 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+ 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+ 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+ 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+ 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+ 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+ 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+ 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+ 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+ 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+ 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+ 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+ 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+ 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+ 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+ 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+ 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+ 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+ 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+ 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+ 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+ 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+ 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+ 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+ 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+ 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+ 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+ 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+ 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+ 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+ 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+ 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+ 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+ 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+ 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+ 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+ 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+ 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+ 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+ 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+ 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+ 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+ 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+ 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+ 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+ 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+ 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+ 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+ 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+ 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+ 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+ 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+ 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+ 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+ 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+ 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+ 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+ 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+ 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+ 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+ 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+ 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+ 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+ 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+ 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+ 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+ 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+ 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+ 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+ 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+ 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+ 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+ 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+ 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+ 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+ 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+ 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+ 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+ 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+ 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+ 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+ 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+ 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+ 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+ 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+ 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+ 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+ 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+ 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+ 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+ 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+ 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+ 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+ 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+ 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+ 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+ 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+ 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+ 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+ 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+ 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+ 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+ 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+ 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+ 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+ 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+ 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+ 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+ 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+ 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+ 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+ 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+ 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+ 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+ 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+ 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+ 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+ 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+ 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+ 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+ 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+ 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+ 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+ 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+ 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+ 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+ 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+ 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+ 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+ 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+ 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+ 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+ 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+ 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+ 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+ 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+ 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+ 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+ 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+ 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+ 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+ 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+ 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+ 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+ 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+ 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+ 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+ 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+ 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+ 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+ 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+ 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+ 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+ 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+ 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+ 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+ 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+ 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+ 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+ 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+ 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+ 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+ 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+ 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+ 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+ 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+ 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+ 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+ 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+ 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+ 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+ 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+ 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+ 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+ 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+ 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+ 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+ 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+ 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+ 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+ 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+ 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+ 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+ 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+ 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+ 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+ 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+ 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+ 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+ 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+ 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+ 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+ 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+ 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+ 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+ 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+ 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+ 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+ 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+ 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+ 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+ 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+ 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+ 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+ 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+ 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+ 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+ 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+ 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+ 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+ 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+ 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+ 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+ 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+ 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+ 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+ 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+ 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+ 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+ 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+ 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+ 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+ 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+ 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+ 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+ 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+ 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+ 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+ 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+ 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+ 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+ 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+ 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+ 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+ 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+ 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+ 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+ 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+ 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+ 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+ 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+ 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+ 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+ 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+ 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+ 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+ 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+ 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+ 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+ 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+ 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+ 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+ 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+ 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+ 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+ 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+ 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+ 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+ 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+ 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+ 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+ 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+ 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+ 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+ 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+ 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+ 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+ 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+ 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+ 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+ 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+ 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+ 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+ 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+ 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+ 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+ 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+ 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+ 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+ 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+ 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+ 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+ 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+ 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+ 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+ 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+ 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+ 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+ 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+ 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+ 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+ 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+ 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+ 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+ 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+ 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+ 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+ 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+ 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+ 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+ 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+ 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+ 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+ 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+ 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+ 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+ 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+ 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+ 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+ 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+ 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+ 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+ 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+ 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+ 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+ 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+ 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+ 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+ 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+ 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+ 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+ 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+ 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+ 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+ 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+ 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+ 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+ 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+ 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+ 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+ 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+ 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+ 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+ 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+ 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+ 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+ 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+ 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+ 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+ 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+ 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+ 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+ 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+ 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+ 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+ 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+ 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+ 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+ 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+ 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+ 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+ 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+ 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+ 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+ 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+ 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+ 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+ 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+ 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+ 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+ 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+ 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+ 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+ 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+ 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+ 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+ 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+ 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+ 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+ 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+ 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+ 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+ 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+ 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+ 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+ 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+ 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+ 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+ 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+ 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+ 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+ 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+ 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+ 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+ 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+ 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+ 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+ 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+ 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+ 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+ 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+ 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+ 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+ 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+ 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+ 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+ 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+ 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+ 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+ 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+ 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+ 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+ 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+ 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+ 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+ 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+ 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+ 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+ 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+ 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+ 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+ 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+ 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+ 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+ 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+ 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+ 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+ 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+ 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+ 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+ 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+ 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+ 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+ 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+ 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+ 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+ 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+ 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+ 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+ 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+ 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+ 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+ 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+ 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+ 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+ 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+ 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+ 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+ 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+ 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+ 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+ 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+ 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+ 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+ 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+ 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+ 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+ 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+ 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+ 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+ 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+ 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+ 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+ 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+ 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+ 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+ 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+ 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+ 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+ 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+ 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+ 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+ 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+ 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+ 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+ 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+ 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+ 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+ 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+ 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+ 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+ 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+ 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+ 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+ 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+ 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+ 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+ 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+ 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+ 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+ 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+ 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+ 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+ 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+ 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+ 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+ 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+ 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+ 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+ 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+ 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+ 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+ 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+ 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+ 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+ 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+ 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+ 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+ 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+ 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+ 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+ 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+ 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+ 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+ 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+ 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+ 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+ 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+ 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+ 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+ 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+ 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+ 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+ 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+ 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+ 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+ 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+ 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+ 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+ 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+ 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+ 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+ 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+ 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+ 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+ 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+ 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+ 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+ 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+ 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+ 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+ 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+ 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+ 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+ 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+ 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+ 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+ 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+ 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+ 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+ 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+ 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+ 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+ 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+ 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+ 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+ 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+ 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+ 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+ 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+ 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+ 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+ 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+ 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+ 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+ 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+ 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+ 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+ 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+ 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+ 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+ 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+ 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+ 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+ 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+ 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+ 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+ 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+ 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+ 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+ 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+ 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+ 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+ 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+ 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+ 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+ 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+ 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+ 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+ 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+ 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+ 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+ 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+ 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+ 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+ 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+ 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+ 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+ 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+ 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+ 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+ 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+ 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+ 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+ 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+ 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+ 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+ 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+ 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+ 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+ 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+ 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+ 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+ 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+ 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+ 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+ 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+ 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+ 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+ 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+ 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+ 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+ 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+ 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+ 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+ 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+ 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+ 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+ 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+ 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+ 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+ 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+ 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+ 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+ 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+ 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+ 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+ 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+ 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+ 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+ 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+ 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+ 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+ 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+ 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+ 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+ 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+ 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+ 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+ 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+ 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+ 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+ 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+ 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+ 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+ 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+ 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+ 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+ 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+ 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+ 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+ 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+ 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+ 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+ 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+ 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+ 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+ 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+ 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+ 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+ 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+ 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+ 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+ 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+ 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+ 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+ 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+ 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+ 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+ 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+ 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+ 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+ 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+ 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+ 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+ 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+ 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+ 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+ 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+ 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+ 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+ 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+ 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+ 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+ 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+ 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+ 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+ 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+ 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+ 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+ 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+ 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+ 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+ 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+ 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+ 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+ 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+ 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+ 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+ 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+ 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+ 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+ 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+ 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+ 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+ 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+ 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+ 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+ 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+ 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+ 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+ 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+ 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+ 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+ 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+ 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+ 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+ 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+ 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+ 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+ 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+ 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+ 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+ 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+ 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+ 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+ 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+ 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+ 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+ 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+ 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+ 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+ 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+ 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+ 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+ 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+ 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+ 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+ 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+ 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+ 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+ 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+ 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+ 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+ 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+ 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+ 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+ 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+ 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+ 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+ 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+ 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+ 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+ 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+ 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+ 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+ 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+ 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+ 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+ 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+ 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+ 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+ 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+ 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+ 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+ 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+ 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+ 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+ 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+ 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+ 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+ 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+ 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+ 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+ 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+ 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+ 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+ 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+ 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+ 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+ 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+ 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+ 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+ 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+ 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+ 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+ 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+ 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+ 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+ 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+ 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+ 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+ 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+ 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+ 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+ 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+ 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+ 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+ 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+ 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+ 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+ 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+ 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+ 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+ 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+ 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+ 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+ 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+ 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+ 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+ 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+ 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+ 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+ 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+ 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+ 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+ 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+ 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+ 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+ 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+ 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+ 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+ 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+ 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+ 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+ 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+ 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+ 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+ 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+ 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+ 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+ 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+ 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+ 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+ 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+ 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+ 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+ 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+ 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+ 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+ 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+ 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+ 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+ 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+ 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+ 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+ 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+ 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+ 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+ 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+ 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+ 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+ 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+ 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+ 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+ 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+ 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+ 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+ 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+ 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+ 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+ 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+ 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+ 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+ 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+ 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+ 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+ 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+ 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+ 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+ 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+ 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+ 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+ 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+ 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+ 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+ 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+ 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+ 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+ 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+ 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+ 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+ 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+ 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+ 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+ 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+ 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+ 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+ 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+ 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+ 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+ 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+ 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+ 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+ 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+ 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+ 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+ 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+ 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+ 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+ 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+ 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+ 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+ 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+ 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+ 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+ 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+ 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+ 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+ 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+ 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+ 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+ 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+ 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+ 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+ 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+ 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+ 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+ 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+ 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+ 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+ 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+ 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+ 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+ 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+ 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+ 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+ 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+ 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+ 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+ 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+ 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+ 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+ 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+ 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+ 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+ 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+ 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+ 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+ 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+ 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+ 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+ 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+ 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+ 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+ 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+ 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+ 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+ 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+ 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+ 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+ 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+ 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+ 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+ 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+ 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+ 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+ 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+ 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+ 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+ 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+ 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+ 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+ 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+ 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+ 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+ 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+ 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+ 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+ 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+ 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+ 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+ 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+ 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+ 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+ 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+ 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+ 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+ 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+ 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+ 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+ 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+ 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+ 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+ 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+ 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+ 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+ 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+ 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+ 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+ 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+ 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+ 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+ 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+ 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+ 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+ 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+ 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+ 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+ 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+ 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+ 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+ 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+ 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+ 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+ 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+ 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+ 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+ 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+ 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+ 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+ 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+ 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+ 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+ 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+ 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+ 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+ 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+ 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+ 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+ 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+ 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+ 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+ 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+ 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+ 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+ 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+ 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+ 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+ 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+ 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+ 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+ 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+ 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+ 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+ 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+ 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+ 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+ 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+ 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+ 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+ 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+ 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+ 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+ 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+ 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+ 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+ 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+ 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+ 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+ 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+ 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+ 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+ 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+ 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+ 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+ 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+ 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+ 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+ 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+ 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+ 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+ 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+ 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+ 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+ 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+ 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+ 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+ 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+ 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+ 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+ 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+ 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+ 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+ 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+ 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+ 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+ 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+ 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+ 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+ 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+ 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+ 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+ 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+ 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+ 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+ 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+ 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+ 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+ 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+ 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+ 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+ 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+ 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+ 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+ 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+ 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+ 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+ 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+ 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+ 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+ 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+ 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+ 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+ 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+ 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+ 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+ 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+ 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+ 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+ 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+ 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+ 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+ 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+ 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+ 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+ 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+ 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+ 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+ 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+ 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+ 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+ 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+ 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+ 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+ 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+ 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+ 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+ 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+ 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+ 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+ 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+ 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+ 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+ 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+ 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+ 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+ 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+ 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+ 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+ 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+ 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+ 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+ 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+ 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+ 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+ 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+ 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+ 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+ 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+ 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+ 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+ 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+ 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+ 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+ 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+ 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+ 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+ 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+ 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+ 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+ 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+ 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+ 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+ 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+ 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+ 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+ 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+ 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+ 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+ 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+ 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+ 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+ 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+ 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+ 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+ 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+ 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+ 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+ 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+ 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+ 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+ 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+ 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+ 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+ 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+ 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+ 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+ 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+ 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+ 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+ 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+ 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+ 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+ 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+ 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+ 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+ 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+ 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+ 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+ 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+ 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+ 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+ 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+ 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+ 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+ 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+ 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+ 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+ 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+ 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+ 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+ 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+ 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+ 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+ 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+ 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+ 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+ 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+ 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+ 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+ 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+ 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+ 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+ 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+ 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+ 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+ 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 0000000..1cba1ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,22 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+using namespace llvm;
+
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 0000000..8edfb9a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,33 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+ ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 0000000..036822d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,352 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+}
+
+class ARMFReg<bits<6> num, string n> : Register<n> {
+ field bits<6> Num;
+ let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex; // In a Q reg.
+def ssub_3 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+}
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+ (ssub_3 dsub_1, ssub_1)] in {
+def Q0 : ARMReg< 0, "q0", [D0, D1]>;
+def Q1 : ARMReg< 1, "q1", [D2, D3]>;
+def Q2 : ARMReg< 2, "q2", [D4, D5]>;
+def Q3 : ARMReg< 3, "q3", [D6, D7]>;
+def Q4 : ARMReg< 4, "q4", [D8, D9]>;
+def Q5 : ARMReg< 5, "q5", [D10, D11]>;
+def Q6 : ARMReg< 6, "q6", [D12, D13]>;
+def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8 : ARMReg< 8, "q8", [D16, D17]>;
+def Q9 : ARMReg< 9, "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
+let SubRegIndices = [qsub_0, qsub_1],
+ CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
+def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1],
+ CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def ITSTATE : ARMReg<4, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID : ARMReg<0, "fpsid">;
+def FPEXC : ARMReg<8, "fpexc">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r9 == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+ SP, LR, PC)> {
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRs without the PC. Some ARM instructions do not allow the PC in
+// certain operand slots, particularly as the destination. Primarily
+// useful for disassembly.
+def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
+// implied SP argument list.
+// FIXME: It would be better to not use this at all and refactor the
+// instructions to not have SP an an explicit argument. That makes
+// frame index resolution a bit trickier, though.
+def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+ let AltOrders = [(and tcGPR, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Scalar single precision floating point register class..
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (sequence "D%u", 0, 31)> {
+ // Allocate non-VFP2 registers D16-D31 first.
+ let AltOrders = [(rotl DPR, 16)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (trunc DPR, 16)> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1)];
+}
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (trunc DPR, 8)> {
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
+}
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (sequence "Q%u", 0, 15)> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1)];
+ // Allocate non-VFP2 aliases Q8-Q15 first.
+ let AltOrders = [(rotl QPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 8)> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1)];
+}
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 4)> {
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR qsub_0, qsub_1)];
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQPR, 4)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+ dsub_4, dsub_5, dsub_6, dsub_7),
+ (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQQQPR, 2)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRelocations.h b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 0000000..86e7206
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,62 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+ // reloc_arm_absolute - Absolute relocation, just add the relocated value
+ // to the value already in memory.
+ reloc_arm_absolute,
+
+ // reloc_arm_relative - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_arm_relative,
+
+ // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+ // addresses are kept locally in a map.
+ reloc_arm_cp_entry,
+
+ // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+ // should be divided by 4.
+ reloc_arm_vfp_cp_entry,
+
+ // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+ // entry.
+ reloc_arm_machine_cp_entry,
+
+ // reloc_arm_jt_base - PC relative relocation for jump tables whose
+ // addresses are kept locally in a map.
+ reloc_arm_jt_base,
+
+ // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+ reloc_arm_pic_jt,
+
+ // reloc_arm_branch - Branch address relocation.
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
+ };
+ }
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 0000000..958c5c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,261 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx : InstrItinClass;
+def IIC_iALUi : InstrItinClass;
+def IIC_iALUr : InstrItinClass;
+def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
+def IIC_iALUsr : InstrItinClass;
+def IIC_iBITi : InstrItinClass;
+def IIC_iBITr : InstrItinClass;
+def IIC_iBITsi : InstrItinClass;
+def IIC_iBITsr : InstrItinClass;
+def IIC_iUNAr : InstrItinClass;
+def IIC_iUNAsi : InstrItinClass;
+def IIC_iEXTr : InstrItinClass;
+def IIC_iEXTAr : InstrItinClass;
+def IIC_iEXTAsr : InstrItinClass;
+def IIC_iCMPi : InstrItinClass;
+def IIC_iCMPr : InstrItinClass;
+def IIC_iCMPsi : InstrItinClass;
+def IIC_iCMPsr : InstrItinClass;
+def IIC_iTSTi : InstrItinClass;
+def IIC_iTSTr : InstrItinClass;
+def IIC_iTSTsi : InstrItinClass;
+def IIC_iTSTsr : InstrItinClass;
+def IIC_iMOVi : InstrItinClass;
+def IIC_iMOVr : InstrItinClass;
+def IIC_iMOVsi : InstrItinClass;
+def IIC_iMOVsr : InstrItinClass;
+def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
+def IIC_iMVNi : InstrItinClass;
+def IIC_iMVNr : InstrItinClass;
+def IIC_iMVNsi : InstrItinClass;
+def IIC_iMVNsr : InstrItinClass;
+def IIC_iCMOVi : InstrItinClass;
+def IIC_iCMOVr : InstrItinClass;
+def IIC_iCMOVsi : InstrItinClass;
+def IIC_iCMOVsr : InstrItinClass;
+def IIC_iCMOVix2 : InstrItinClass;
+def IIC_iMUL16 : InstrItinClass;
+def IIC_iMAC16 : InstrItinClass;
+def IIC_iMUL32 : InstrItinClass;
+def IIC_iMAC32 : InstrItinClass;
+def IIC_iMUL64 : InstrItinClass;
+def IIC_iMAC64 : InstrItinClass;
+def IIC_iLoad_i : InstrItinClass;
+def IIC_iLoad_r : InstrItinClass;
+def IIC_iLoad_si : InstrItinClass;
+def IIC_iLoad_iu : InstrItinClass;
+def IIC_iLoad_ru : InstrItinClass;
+def IIC_iLoad_siu : InstrItinClass;
+def IIC_iLoad_bh_i : InstrItinClass;
+def IIC_iLoad_bh_r : InstrItinClass;
+def IIC_iLoad_bh_si : InstrItinClass;
+def IIC_iLoad_bh_iu : InstrItinClass;
+def IIC_iLoad_bh_ru : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i : InstrItinClass;
+def IIC_iLoad_d_r : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
+def IIC_iPop : InstrItinClass<0>; // micro-coded
+def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoadiALU : InstrItinClass;
+def IIC_iStore_i : InstrItinClass;
+def IIC_iStore_r : InstrItinClass;
+def IIC_iStore_si : InstrItinClass;
+def IIC_iStore_iu : InstrItinClass;
+def IIC_iStore_ru : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i : InstrItinClass;
+def IIC_iStore_bh_r : InstrItinClass;
+def IIC_iStore_bh_si : InstrItinClass;
+def IIC_iStore_bh_iu : InstrItinClass;
+def IIC_iStore_bh_ru : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i : InstrItinClass;
+def IIC_iStore_d_r : InstrItinClass;
+def IIC_iStore_d_ru : InstrItinClass;
+def IIC_iStore_m : InstrItinClass<0>; // micro-coded
+def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_Preload : InstrItinClass;
+def IIC_Br : InstrItinClass;
+def IIC_fpSTAT : InstrItinClass;
+def IIC_fpUNA32 : InstrItinClass;
+def IIC_fpUNA64 : InstrItinClass;
+def IIC_fpCMP32 : InstrItinClass;
+def IIC_fpCMP64 : InstrItinClass;
+def IIC_fpCVTSD : InstrItinClass;
+def IIC_fpCVTDS : InstrItinClass;
+def IIC_fpCVTSH : InstrItinClass;
+def IIC_fpCVTHS : InstrItinClass;
+def IIC_fpCVTIS : InstrItinClass;
+def IIC_fpCVTID : InstrItinClass;
+def IIC_fpCVTSI : InstrItinClass;
+def IIC_fpCVTDI : InstrItinClass;
+def IIC_fpMOVIS : InstrItinClass;
+def IIC_fpMOVID : InstrItinClass;
+def IIC_fpMOVSI : InstrItinClass;
+def IIC_fpMOVDI : InstrItinClass;
+def IIC_fpALU32 : InstrItinClass;
+def IIC_fpALU64 : InstrItinClass;
+def IIC_fpMUL32 : InstrItinClass;
+def IIC_fpMUL64 : InstrItinClass;
+def IIC_fpMAC32 : InstrItinClass;
+def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpDIV32 : InstrItinClass;
+def IIC_fpDIV64 : InstrItinClass;
+def IIC_fpSQRT32 : InstrItinClass;
+def IIC_fpSQRT64 : InstrItinClass;
+def IIC_fpLoad32 : InstrItinClass;
+def IIC_fpLoad64 : InstrItinClass;
+def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpStore32 : InstrItinClass;
+def IIC_fpStore64 : InstrItinClass;
+def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_VLD1 : InstrItinClass;
+def IIC_VLD1x2 : InstrItinClass;
+def IIC_VLD1x3 : InstrItinClass;
+def IIC_VLD1x4 : InstrItinClass;
+def IIC_VLD1u : InstrItinClass;
+def IIC_VLD1x2u : InstrItinClass;
+def IIC_VLD1x3u : InstrItinClass;
+def IIC_VLD1x4u : InstrItinClass;
+def IIC_VLD1ln : InstrItinClass;
+def IIC_VLD1lnu : InstrItinClass;
+def IIC_VLD1dup : InstrItinClass;
+def IIC_VLD1dupu : InstrItinClass;
+def IIC_VLD2 : InstrItinClass;
+def IIC_VLD2x2 : InstrItinClass;
+def IIC_VLD2u : InstrItinClass;
+def IIC_VLD2x2u : InstrItinClass;
+def IIC_VLD2ln : InstrItinClass;
+def IIC_VLD2lnu : InstrItinClass;
+def IIC_VLD2dup : InstrItinClass;
+def IIC_VLD2dupu : InstrItinClass;
+def IIC_VLD3 : InstrItinClass;
+def IIC_VLD3ln : InstrItinClass;
+def IIC_VLD3u : InstrItinClass;
+def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
+def IIC_VLD4 : InstrItinClass;
+def IIC_VLD4ln : InstrItinClass;
+def IIC_VLD4u : InstrItinClass;
+def IIC_VLD4lnu : InstrItinClass;
+def IIC_VLD4dup : InstrItinClass;
+def IIC_VLD4dupu : InstrItinClass;
+def IIC_VST1 : InstrItinClass;
+def IIC_VST1x2 : InstrItinClass;
+def IIC_VST1x3 : InstrItinClass;
+def IIC_VST1x4 : InstrItinClass;
+def IIC_VST1u : InstrItinClass;
+def IIC_VST1x2u : InstrItinClass;
+def IIC_VST1x3u : InstrItinClass;
+def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
+def IIC_VST2 : InstrItinClass;
+def IIC_VST2x2 : InstrItinClass;
+def IIC_VST2u : InstrItinClass;
+def IIC_VST2x2u : InstrItinClass;
+def IIC_VST2ln : InstrItinClass;
+def IIC_VST2lnu : InstrItinClass;
+def IIC_VST3 : InstrItinClass;
+def IIC_VST3u : InstrItinClass;
+def IIC_VST3ln : InstrItinClass;
+def IIC_VST3lnu : InstrItinClass;
+def IIC_VST4 : InstrItinClass;
+def IIC_VST4u : InstrItinClass;
+def IIC_VST4ln : InstrItinClass;
+def IIC_VST4lnu : InstrItinClass;
+def IIC_VUNAD : InstrItinClass;
+def IIC_VUNAQ : InstrItinClass;
+def IIC_VBIND : InstrItinClass;
+def IIC_VBINQ : InstrItinClass;
+def IIC_VPBIND : InstrItinClass;
+def IIC_VFMULD : InstrItinClass;
+def IIC_VFMULQ : InstrItinClass;
+def IIC_VMOV : InstrItinClass;
+def IIC_VMOVImm : InstrItinClass;
+def IIC_VMOVD : InstrItinClass;
+def IIC_VMOVQ : InstrItinClass;
+def IIC_VMOVIS : InstrItinClass;
+def IIC_VMOVID : InstrItinClass;
+def IIC_VMOVISL : InstrItinClass;
+def IIC_VMOVSI : InstrItinClass;
+def IIC_VMOVDI : InstrItinClass;
+def IIC_VMOVN : InstrItinClass;
+def IIC_VPERMD : InstrItinClass;
+def IIC_VPERMQ : InstrItinClass;
+def IIC_VPERMQ3 : InstrItinClass;
+def IIC_VMACD : InstrItinClass;
+def IIC_VMACQ : InstrItinClass;
+def IIC_VRECSD : InstrItinClass;
+def IIC_VRECSQ : InstrItinClass;
+def IIC_VCNTiD : InstrItinClass;
+def IIC_VCNTiQ : InstrItinClass;
+def IIC_VUNAiD : InstrItinClass;
+def IIC_VUNAiQ : InstrItinClass;
+def IIC_VQUNAiD : InstrItinClass;
+def IIC_VQUNAiQ : InstrItinClass;
+def IIC_VBINiD : InstrItinClass;
+def IIC_VBINiQ : InstrItinClass;
+def IIC_VSUBiD : InstrItinClass;
+def IIC_VSUBiQ : InstrItinClass;
+def IIC_VBINi4D : InstrItinClass;
+def IIC_VBINi4Q : InstrItinClass;
+def IIC_VSUBi4D : InstrItinClass;
+def IIC_VSUBi4Q : InstrItinClass;
+def IIC_VABAD : InstrItinClass;
+def IIC_VABAQ : InstrItinClass;
+def IIC_VSHLiD : InstrItinClass;
+def IIC_VSHLiQ : InstrItinClass;
+def IIC_VSHLi4D : InstrItinClass;
+def IIC_VSHLi4Q : InstrItinClass;
+def IIC_VPALiD : InstrItinClass;
+def IIC_VPALiQ : InstrItinClass;
+def IIC_VMULi16D : InstrItinClass;
+def IIC_VMULi32D : InstrItinClass;
+def IIC_VMULi16Q : InstrItinClass;
+def IIC_VMULi32Q : InstrItinClass;
+def IIC_VMACi16D : InstrItinClass;
+def IIC_VMACi32D : InstrItinClass;
+def IIC_VMACi16Q : InstrItinClass;
+def IIC_VMACi32Q : InstrItinClass;
+def IIC_VEXTD : InstrItinClass;
+def IIC_VEXTQ : InstrItinClass;
+def IIC_VTB1 : InstrItinClass;
+def IIC_VTB2 : InstrItinClass;
+def IIC_VTB3 : InstrItinClass;
+def IIC_VTB4 : InstrItinClass;
+def IIC_VTBX1 : InstrItinClass;
+def IIC_VTBX2 : InstrItinClass;
+def IIC_VTBX3 : InstrItinClass;
+def IIC_VTBX4 : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 0000000..8d86c01
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1034 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0 : FuncUnit; // pipeline 0
+def A8_Pipe1 : FuncUnit; // pipeline 1
+def A8_LSPipe : FuncUnit; // Load / store pipeline
+def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+ [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+ [], [
+ // Two fully-pipelined integer ALU pipelines
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+ // Integer multiply pipeline
+ // Result written in E5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ // FIXME: lsl by 2 takes 1 cycle.
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<7, [A8_NPipe], 0>,
+ InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NPipe], 0>,
+ InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<9, [A8_NPipe], 0>,
+ InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<11, [A8_NPipe], 0>,
+ InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<20, [A8_NPipe], 0>,
+ InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 20, 1]>,
+
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
+ // NEON
+ // Issue through integer pipeline, and execute in NEON unit.
+ //
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1]>,
+ //
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1]>,
+ //
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2]>,
+ //
+ // Double-register FP Binary
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Quad-register Permute Move
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [2, 1]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 0000000..49fedf6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,1827 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0 : FuncUnit; // Issue 0
+def A9_Issue1 : FuncUnit; // Issue 1
+def A9_Branch : FuncUnit; // Branch
+def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1 : FuncUnit; // ALU pipeline 1
+def A9_AGU : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe : FuncUnit; // NEON pipeline
+def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+ [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+ A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+ [A9_LdBypass], [
+ // Two fully-pipelined integer ALU pipelines
+
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>],
+ [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1], [A9_LdBypass]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit], 0>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 1, 1], [A9_LdBypass]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 4, 1, 1], [A9_LdBypass]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
+
+ // VFP and NEON shares the same register file. This means that every VFP
+ // instruction should wait for full completion of the consecutive NEON
+ // instruction and vice-versa. We model this behavior with two artificial FUs:
+ // DRegsVFP and DRegsVFP.
+ //
+ // Every VFP instruction:
+ // - Acquires DRegsVFP resource for 1 cycle
+ // - Reserves DRegsN resource for the whole duration (including time to
+ // register file writeback!).
+ // Every NEON instruction does the same but with FUs swapped.
+ //
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit.
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<6, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<7, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<16, [A9_DRegsN], 0, Reserved>,
+ InstrStage<10, [A9_NPipe]>],
+ [15, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<26, [A9_DRegsN], 0, Reserved>,
+ InstrStage<20, [A9_NPipe]>],
+ [25, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<18, [A9_DRegsN], 0, Reserved>,
+ InstrStage<13, [A9_NPipe]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<33, [A9_DRegsN], 0, Reserved>,
+ InstrStage<28, [A9_NPipe]>],
+ [32, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Load
+ // FIXME: Result latency is 1 if address is 64-bit aligned.
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Load Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ // NEON
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 2, 1]>,
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 2]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 3, 2, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1,1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 2]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 1]>,
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 10 cycles
+ InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 11 cycles
+ InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [10, 2, 2]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 4, 1, 1]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 0000000..c1880a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,294 @@
+//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+ [V6_Pipe], [], [
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [4]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // Double-precision FP Store
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 0000000..a3a3d58
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,197 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+// Adjust parameters for memset, EABI uses format (ptr, size, value),
+// GNU library uses (ptr, value, size)
+// See RTABI section 4.3.4
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ // Use default for non AAPCS subtargets
+ if (!Subtarget->isAAPCS_ABI())
+ return SDValue();
+
+ const ARMTargetLowering &TLI =
+ *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // First argument: data pointer
+ Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ // Second argument: buffer size
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+ // Third argument: value to fill
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+
+ // Emit __eabi_memset call
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain,
+ Type::getVoidTy(*DAG.getContext()), // return type
+ false, // return sign ext
+ false, // return zero ext
+ false, // is var arg
+ false, // is in regs
+ 0, // number of fixed arguments
+ TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
+ false, // is tail call
+ false, // is return val used
+ DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()), // callee
+ Args, DAG, dl); // arg list, DAG and debug
+
+ return CallResult.second;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 0000000..6419a73
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,68 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+namespace ARM_AM {
+ static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
+ switch (Opcode) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+} // end namespace ARM_AM
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+ ~ARMSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ // Adjust parameters for memset, see RTABI section 4.3.4
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000..247d6be
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,219 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ReserveR9("arm-reserve-r9", cl::Hidden,
+ cl::desc("Reserve R9, making it unavailable as GPR"));
+
+static cl::opt<bool>
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+ cl::desc("Disallow all unaligned memory accesses"));
+
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS)
+ : ARMGenSubtargetInfo(TT, CPU, FS)
+ , ARMProcFamily(Others)
+ , HasV4TOps(false)
+ , HasV5TOps(false)
+ , HasV5TEOps(false)
+ , HasV6Ops(false)
+ , HasV6T2Ops(false)
+ , HasV7Ops(false)
+ , HasVFPv2(false)
+ , HasVFPv3(false)
+ , HasNEON(false)
+ , UseNEONForSinglePrecisionFP(false)
+ , SlowFPVMLx(false)
+ , HasVMLxForwarding(false)
+ , SlowFPBrcc(false)
+ , InThumbMode(false)
+ , InNaClMode(false)
+ , HasThumb2(false)
+ , IsMClass(false)
+ , NoARM(false)
+ , PostRAScheduler(false)
+ , IsR9Reserved(ReserveR9)
+ , UseMovt(false)
+ , SupportsTailCall(false)
+ , HasFP16(false)
+ , HasD16(false)
+ , HasHardwareDivide(false)
+ , HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , AvoidCPSRPartialUpdate(false)
+ , HasMPExtension(false)
+ , FPOnlySP(false)
+ , AllowsUnalignedMem(false)
+ , Thumb2DSP(false)
+ , stackAlignment(4)
+ , CPUString(CPU)
+ , TargetTriple(TT)
+ , TargetABI(ARM_ABI_APCS) {
+ // Determine default and user specified characteristics
+ if (CPUString.empty())
+ CPUString = "generic";
+
+ // Insert the architecture feature derived from the target triple into the
+ // feature string. This is important for setting features that are implied
+ // based on the architecture version.
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS;
+ else
+ ArchFS = FS;
+ }
+ ParseSubtargetFeatures(CPUString, ArchFS);
+
+ // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
+ // ARM version or CPU and then remove this.
+ if (!HasV6T2Ops && hasThumb2())
+ HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ // After parsing Itineraries, set ItinData.IssueWidth.
+ computeIssueWidth();
+
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (!isTargetDarwin())
+ UseMovt = hasV6T2Ops();
+ else {
+ IsR9Reserved = ReserveR9 | !HasV6Ops;
+ UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ const Triple &T = getTargetTriple();
+ SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0);
+ }
+
+ if (!isThumb() || hasThumb2())
+ PostRAScheduler = true;
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+ AllowsUnalignedMem = true;
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ if (!isTargetDarwin()) {
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else {
+ if (RelocM == Reloc::PIC_) {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage())
+ // Hidden $non_lazy_ptr reference.
+ return true;
+
+ return false;
+ } else {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+ // If we have a reasonable estimate of the pipeline depth, then we can
+ // estimate the penalty of a misprediction based on that.
+ if (isCortexA8())
+ return 13;
+ else if (isCortexA9())
+ return 8;
+
+ // Otherwise, just return a sensible default.
+ return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+ unsigned allStage1Units = 0;
+ for (const InstrItinerary *itin = InstrItins.Itineraries;
+ itin->FirstStage != ~0U; ++itin) {
+ const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+ allStage1Units |= IS->getUnits();
+ }
+ InstrItins.IssueWidth = 0;
+ while (allStage1Units) {
+ ++InstrItins.IssueWidth;
+ // clear the lowest bit
+ allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+ }
+ assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
+bool ARMSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000..b63e108
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,270 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class ARMSubtarget : public ARMGenSubtargetInfo {
+protected:
+ enum ARMProcFamilyEnum {
+ Others, CortexA8, CortexA9
+ };
+
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
+ /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+ /// Specify whether target support specific ARM ISA variants.
+ bool HasV4TOps;
+ bool HasV5TOps;
+ bool HasV5TEOps;
+ bool HasV6Ops;
+ bool HasV6T2Ops;
+ bool HasV7Ops;
+
+ /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
+ /// supported.
+ bool HasVFPv2;
+ bool HasVFPv3;
+ bool HasNEON;
+
+ /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+ /// specified. Use the method useNEONForSinglePrecisionFP() to
+ /// determine if NEON should actually be used.
+ bool UseNEONForSinglePrecisionFP;
+
+ /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+ /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVMLx;
+
+ /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+ /// forwarding to allow mul + mla being issued back to back.
+ bool HasVMLxForwarding;
+
+ /// SlowFPBrcc - True if floating point compare + branch is slow.
+ bool SlowFPBrcc;
+
+ /// InThumbMode - True if compiling for Thumb, false for ARM.
+ bool InThumbMode;
+
+ /// InNaClMode - True if targeting Native Client
+ bool InNaClMode;
+
+ /// HasThumb2 - True if Thumb2 instructions are supported.
+ bool HasThumb2;
+
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// v6m, v7m for example.
+ bool IsMClass;
+
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
+ /// PostRAScheduler - True if using post-register-allocation scheduler.
+ bool PostRAScheduler;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+ /// imms (including global addresses).
+ bool UseMovt;
+
+ /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
+ /// must be able to synthesize call stubs for interworking between ARM and
+ /// Thumb.
+ bool SupportsTailCall;
+
+ /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+ /// only so far)
+ bool HasFP16;
+
+ /// HasD16 - True if subtarget is limited to 16 double precision
+ /// FP registers for VFPv3.
+ bool HasD16;
+
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+ /// that partially update CPSR and add false dependency on the previous
+ /// CPSR setting instruction.
+ bool AvoidCPSRPartialUpdate;
+
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
+ /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// accesses for some types. For details, see
+ /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ bool AllowsUnalignedMem;
+
+ /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
+ /// and such) instructions in Thumb2 code.
+ bool Thumb2DSP;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+ // Change this once Thumb1 ldmia / stmia support is added.
+ return isThumb1Only() ? 0 : 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ void computeIssueWidth();
+
+ bool hasV4TOps() const { return HasV4TOps; }
+ bool hasV5TOps() const { return HasV5TOps; }
+ bool hasV5TEOps() const { return HasV5TEOps; }
+ bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6T2Ops() const { return HasV6T2Ops; }
+ bool hasV7Ops() const { return HasV7Ops; }
+
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
+ bool hasARMOps() const { return !NoARM; }
+
+ bool hasVFP2() const { return HasVFPv2; }
+ bool hasVFP3() const { return HasVFPv3; }
+ bool hasNEON() const { return HasNEON; }
+ bool useNEONForSinglePrecisionFP() const {
+ return hasNEON() && UseNEONForSinglePrecisionFP; }
+
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool hasVMLxForwarding() const { return HasVMLxForwarding; }
+ bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool hasMPExtension() const { return HasMPExtension; }
+ bool hasThumb2DSP() const { return Thumb2DSP; }
+
+ bool hasFP16() const { return HasFP16; }
+ bool hasD16() const { return HasD16; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NativeClient;
+ }
+ bool isTargetELF() const { return !isTargetDarwin(); }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return InThumbMode; }
+ bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+ bool isThumb2() const { return InThumbMode && HasThumb2; }
+ bool hasThumb2() const { return HasThumb2; }
+ bool isMClass() const { return IsMClass; }
+ bool isARClass() const { return !IsMClass; }
+
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+ bool supportsTailCall() const { return SupportsTailCall; }
+
+ bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ unsigned getMispredictionPenalty() const;
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+ /// symbol.
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 0000000..96b1e89
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,160 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMFrameLowering.h"
+#include "ARM.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableGlobalMerge("global-merge", cl::Hidden,
+ cl::desc("Enable global merge pass"),
+ cl::init(true));
+
+extern "C" void LLVMInitializeARMTarget() {
+ // Register the target.
+ RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+ RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ JITInfo(),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+ // Default to soft float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Soft;
+}
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "v128:32:128-v64:32:64-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32-S64") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32-S32")),
+ ELFWriterInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
+ InstrInfo(Subtarget.hasThumb2()
+ ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+ : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32-S64") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32-S32")),
+ ELFWriterInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget.hasThumb2()
+ ? new ARMFrameLowering(Subtarget)
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+}
+
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+
+ return false;
+}
+
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createARMISelDag(*this, OptLevel));
+ return false;
+}
+
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+ PM.add(createARMLoadStoreOptimizationPass(true));
+ if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+ PM.add(createMLxExpansionPass());
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (OptLevel != CodeGenOpt::None) {
+ if (!Subtarget.isThumb1Only())
+ PM.add(createARMLoadStoreOptimizationPass());
+ if (Subtarget.hasNEON())
+ PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ }
+
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ PM.add(createARMExpandPseudoPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ if (!Subtarget.isThumb1Only())
+ PM.add(createIfConverterPass());
+ }
+ if (Subtarget.isThumb2())
+ PM.add(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ PM.add(createARMConstantIslandPass());
+ return true;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 0000000..c8c601c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,145 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "ARMInstrInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
+ ARMSubtarget Subtarget;
+private:
+ ARMJITInfo JITInfo;
+ InstrItineraryData InstrItins;
+
+public:
+ ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &MCE);
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+ ARMInstrInfo InstrInfo;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ ARMFrameLowering FrameLowering;
+ public:
+ ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const ARMRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+/// Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ OwningPtr<ARMBaseInstrInfo> InstrInfo;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ OwningPtr<ARMFrameLowering> FrameLowering;
+public:
+ ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
+ virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+ virtual const ARMBaseInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
+ /// returns either Thumb1FrameLowering or ARMFrameLowering
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 0000000..19defa1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,47 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ LSDASection = NULL;
+ }
+
+ AttributesSection =
+ getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 0000000..c6a7261
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,38 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+ const MCSection *AttributesSection;
+public:
+ ARMElfTargetObjectFile() :
+ TargetLoweringObjectFileELF(),
+ AttributesSection(NULL)
+ {}
+
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ virtual const MCSection *getAttributesSection() const {
+ return AttributesSection;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 0000000..14d35ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,143 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
+
+#include "llvm/Support/TargetRegistry.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+class ARMBaseAsmLexer : public MCTargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const MCRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
+ }
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
+ }
+ }
+public:
+ ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : MCTargetAsmLexer(T), AsmInfo(MAI) {
+ }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+ ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ InitRegisterMap(&MRI);
+ }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+ ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ InitRegisterMap(&MRI);
+ }
+};
+
+} // end anonymous namespace
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default: break;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ break;
+ case AsmToken::Identifier: {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+ // Check for register aliases.
+ // r13 -> sp
+ // r14 -> lr
+ // r15 -> pc
+ // ip -> r12
+ // FIXME: Some assemblers support lots of others. Do we want them all?
+ if (!regID) {
+ regID = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+
+ if (regID)
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ }
+ }
+
+ return AsmToken(lexedToken);
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+ RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
+ RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 0000000..24f15b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,4615 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMOperand;
+
+class ARMAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+ struct {
+ ARMCC::CondCodes Cond; // Condition for IT block.
+ unsigned Mask:4; // Condition mask for instructions.
+ // Starting at first 1 (from lsb).
+ // '1' condition as indicated in IT.
+ // '0' inverse of condition (else).
+ // Count of instructions in IT block is
+ // 4 - trailingzeroes(mask)
+
+ bool FirstCond; // Explicit flag for when we're parsing the
+ // First instruction in the IT block. It's
+ // implied in the mask, so needs special
+ // handling.
+
+ unsigned CurPosition; // Current position in parsing of IT
+ // block. In range [0,3]. Initialized
+ // according to count of instructions in block.
+ // ~0U if no active IT block.
+ } ITState;
+ bool inITBlock() { return ITState.CurPosition != ~0U;}
+ void forwardITPosition() {
+ if (!inITBlock()) return;
+ // Move to the next instruction in the IT block, if there is one. If not,
+ // mark the block as done.
+ unsigned TZ = CountTrailingZeros_32(ITState.Mask);
+ if (++ITState.CurPosition == 5 - TZ)
+ ITState.CurPosition = ~0U; // Done with the IT block after this.
+ }
+
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ int tryParseRegister();
+ bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
+ bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
+ unsigned &ShiftAmount);
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveThumbFunc(SMLoc L);
+ bool parseDirectiveCode(SMLoc L);
+ bool parseDirectiveSyntax(SMLoc L);
+
+ StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
+ bool &CarrySetting, unsigned &ProcessorIMod,
+ StringRef &ITMask);
+ void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode);
+
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+ }
+ bool isThumbOne() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+ }
+ bool isThumbTwo() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
+ }
+ bool hasV6Ops() const {
+ return STI.getFeatureBits() & ARM::HasV6Ops;
+ }
+ bool hasV7Ops() const {
+ return STI.getFeatureBits() & ARM::HasV7Ops;
+ }
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ setAvailableFeatures(FB);
+ }
+ bool isMClass() const {
+ return STI.getFeatureBits() & ARM::FeatureMClass;
+ }
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+ /// }
+
+ OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocNumOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocOptionOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseMemBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseProcIFlagsOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseMSRMaskOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
+ StringRef Op, int Low, int High);
+ OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "lsl", 0, 31);
+ }
+ OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "asr", 1, 32);
+ }
+ OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+
+ // Asm Match Converter Methods
+ bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+public:
+ enum ARMMatchResultTy {
+ Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresNotITBlock,
+ Match_RequiresV6,
+ Match_RequiresThumb2
+ };
+
+ ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ MCAsmParserExtension::Initialize(_Parser);
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Not in an ITBlock to start with.
+ ITState.CurPosition = ~0U;
+ }
+
+ // Implementation of the MCTargetAsmParser interface:
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseDirective(AsmToken DirectiveID);
+
+ unsigned checkTargetMatchPredicate(MCInst &Inst);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// instruction.
+class ARMOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ k_CondCode,
+ k_CCOut,
+ k_ITCondMask,
+ k_CoprocNum,
+ k_CoprocReg,
+ k_CoprocOption,
+ k_Immediate,
+ k_FPImmediate,
+ k_MemBarrierOpt,
+ k_Memory,
+ k_PostIndexRegister,
+ k_MSRMask,
+ k_ProcIFlags,
+ k_VectorIndex,
+ k_Register,
+ k_RegisterList,
+ k_DPRRegisterList,
+ k_SPRRegisterList,
+ k_ShiftedRegister,
+ k_ShiftedImmediate,
+ k_ShifterImmediate,
+ k_RotateImmediate,
+ k_BitfieldDescriptor,
+ k_Token
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ SmallVector<unsigned, 8> Registers;
+
+ union {
+ struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
+ unsigned Val;
+ } Cop;
+
+ struct {
+ unsigned Val;
+ } CoprocOption;
+
+ struct {
+ unsigned Mask:4;
+ } ITMask;
+
+ struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
+
+ struct {
+ ARM_PROC::IFlags Val;
+ } IFlags;
+
+ struct {
+ unsigned Val;
+ } MMask;
+
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ unsigned Val;
+ } VectorIndex;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned Val; // encoded 8-bit representation
+ } FPImm;
+
+ /// Combined record for all forms of ARM address expressions.
+ struct {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ } Memory;
+
+ struct {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ } PostIdxReg;
+
+ struct {
+ bool isASR;
+ unsigned Imm;
+ } ShifterImm;
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ } RegShiftedReg;
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ } RegShiftedImm;
+ struct {
+ unsigned Imm;
+ } RotImm;
+ struct {
+ unsigned LSB;
+ unsigned Width;
+ } Bitfield;
+ };
+
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case k_CondCode:
+ CC = o.CC;
+ break;
+ case k_ITCondMask:
+ ITMask = o.ITMask;
+ break;
+ case k_Token:
+ Tok = o.Tok;
+ break;
+ case k_CCOut:
+ case k_Register:
+ Reg = o.Reg;
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList:
+ Registers = o.Registers;
+ break;
+ case k_CoprocNum:
+ case k_CoprocReg:
+ Cop = o.Cop;
+ break;
+ case k_CoprocOption:
+ CoprocOption = o.CoprocOption;
+ break;
+ case k_Immediate:
+ Imm = o.Imm;
+ break;
+ case k_FPImmediate:
+ FPImm = o.FPImm;
+ break;
+ case k_MemBarrierOpt:
+ MBOpt = o.MBOpt;
+ break;
+ case k_Memory:
+ Memory = o.Memory;
+ break;
+ case k_PostIndexRegister:
+ PostIdxReg = o.PostIdxReg;
+ break;
+ case k_MSRMask:
+ MMask = o.MMask;
+ break;
+ case k_ProcIFlags:
+ IFlags = o.IFlags;
+ break;
+ case k_ShifterImmediate:
+ ShifterImm = o.ShifterImm;
+ break;
+ case k_ShiftedRegister:
+ RegShiftedReg = o.RegShiftedReg;
+ break;
+ case k_ShiftedImmediate:
+ RegShiftedImm = o.RegShiftedImm;
+ break;
+ case k_RotateImmediate:
+ RotImm = o.RotImm;
+ break;
+ case k_BitfieldDescriptor:
+ Bitfield = o.Bitfield;
+ break;
+ case k_VectorIndex:
+ VectorIndex = o.VectorIndex;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
+ unsigned getCoproc() const {
+ assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
+ Kind == k_SPRRegisterList) && "Invalid access!");
+ return Registers;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == k_Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ unsigned getFPImm() const {
+ assert(Kind == k_FPImmediate && "Invalid access!");
+ return FPImm.Val;
+ }
+
+ unsigned getVectorIndex() const {
+ assert(Kind == k_VectorIndex && "Invalid access!");
+ return VectorIndex.Val;
+ }
+
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == k_MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
+
+ ARM_PROC::IFlags getProcIFlags() const {
+ assert(Kind == k_ProcIFlags && "Invalid access!");
+ return IFlags.Val;
+ }
+
+ unsigned getMSRMask() const {
+ assert(Kind == k_MSRMask && "Invalid access!");
+ return MMask.Val;
+ }
+
+ bool isCoprocNum() const { return Kind == k_CoprocNum; }
+ bool isCoprocReg() const { return Kind == k_CoprocReg; }
+ bool isCoprocOption() const { return Kind == k_CoprocOption; }
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isCCOut() const { return Kind == k_CCOut; }
+ bool isITMask() const { return Kind == k_ITCondMask; }
+ bool isITCondCode() const { return Kind == k_CondCode; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isImm8s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
+ }
+ bool isImm0_1020s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
+ }
+ bool isImm0_508s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
+ }
+ bool isImm0_255() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 256;
+ }
+ bool isImm0_7() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 8;
+ }
+ bool isImm0_15() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 16;
+ }
+ bool isImm0_31() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isImm1_16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 17;
+ }
+ bool isImm1_32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
+ bool isImm0_65535() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isImm0_65535Expr() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isImm24bit() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 0xffffff;
+ }
+ bool isImmThumbSR() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
+ bool isPKHLSLImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isPKHASRImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isARMSOImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(Value) != -1;
+ }
+ bool isT2SOImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) != -1;
+ }
+ bool isSetEndImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 1 || Value == 0;
+ }
+ bool isReg() const { return Kind == k_Register; }
+ bool isRegList() const { return Kind == k_RegisterList; }
+ bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isMemory() const { return Kind == k_Memory; }
+ bool isShifterImm() const { return Kind == k_ShifterImmediate; }
+ bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
+ bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
+ bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
+ bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
+ bool isPostIdxReg() const {
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ }
+ bool isMemNoOffset(bool alignOK = false) const {
+ if (!isMemory())
+ return false;
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
+ (alignOK || Memory.Alignment == 0);
+ }
+ bool isAlignedMemory() const {
+ return isMemNoOffset(true);
+ }
+ bool isAddrMode2() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAM2OffsetImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAddrMode3() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // No shifts are legal for AM3.
+ if (Memory.ShiftType != ARM_AM::no_shift) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -256 && Val < 256;
+ }
+ bool isAM3Offset() const {
+ if (Kind != k_Immediate && Kind != k_PostIndexRegister)
+ return false;
+ if (Kind == k_PostIndexRegister)
+ return PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ // Immediate offset in range [-255, 255].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAddrMode5() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return false;
+ // Immediate offset in range [-1020, 1020] and a multiple of 4.
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
+ Val == INT32_MIN;
+ }
+ bool isMemTBB() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isMemTBH() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
+ Memory.Alignment != 0 )
+ return false;
+ return true;
+ }
+ bool isMemRegOffset() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isT2MemRegOffset() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.Alignment != 0)
+ return false;
+ // Only lsl #{0, 1, 2, 3} allowed.
+ if (Memory.ShiftType == ARM_AM::no_shift)
+ return true;
+ if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3)
+ return false;
+ return true;
+ }
+ bool isMemThumbRR() const {
+ // Thumb reg+reg addressing is simple. Just two registers, a base and
+ // an offset. No shifts, negations or any other complicating factors.
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return isARMLowRegister(Memory.BaseRegNum) &&
+ (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
+ }
+ bool isMemThumbRIs4() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 124].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 124 && (Val % 4) == 0;
+ }
+ bool isMemThumbRIs2() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 62].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 62 && (Val % 2) == 0;
+ }
+ bool isMemThumbRIs1() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 31].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 31;
+ }
+ bool isMemThumbSPI() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
+ }
+ bool isMemImm8s4Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [-1020, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm0_1020s4Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 256);
+ }
+ bool isMemPosImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val < 256;
+ }
+ bool isMemNegImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-255, -1].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -256 && Val < 0;
+ }
+ bool isMemUImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= 0 && Val < 4096);
+ }
+ bool isMemImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return (Val > -256 && Val < 256) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) ||
+ (Val == INT32_MIN);
+ }
+
+ bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+
+ bool isVectorIndex8() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 8;
+ }
+ bool isVectorIndex16() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 4;
+ }
+ bool isVectorIndex32() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 2;
+ }
+
+
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+ Inst.addOperand(MCOperand::CreateReg(RegNum));
+ }
+
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocOptionOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val));
+ }
+
+ void addITMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(ITMask.Mask));
+ }
+
+ void addITCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ }
+
+ void addCCOutOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm)));
+ }
+
+ void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
+ }
+
+ void addShifterImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) |
+ ShifterImm.Imm));
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::CreateReg(*I));
+ }
+
+ void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addRotImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Encoded as val>>3. The printer handles display as 8, 16, 24.
+ Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
+ }
+
+ void addBitfieldOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Munge the lsb/width into a bitfield mask.
+ unsigned lsb = Bitfield.LSB;
+ unsigned width = Bitfield.Width;
+ // Make a 32-bit mask w/ the referenced bits clear and all other bits set.
+ uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >>
+ (32 - (lsb + width)));
+ Inst.addOperand(MCOperand::CreateImm(Mask));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ }
+
+ void addImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // FIXME: We really want to scale the value here, but the LDRD/STRD
+ // instruction don't encode operands that way yet.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
+ void addImm0_255Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addImm1_32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm24bitOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate, except for 32, which encodes as
+ // zero.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Imm = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
+ }
+
+ void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // An ASR value of 32 encodes as 0, so that's how we want to add it to
+ // the instruction as well.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
+ }
+
+ void addARMSOImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addSetEndImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+ }
+
+ void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ }
+
+ void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
+ }
+
+ void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant AM2OffsetImm operand!");
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAM3OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ if (Kind == k_PostIndexRegister) {
+ int32_t Val =
+ ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0);
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return;
+ }
+
+ // Constant offset.
+ const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm());
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM5Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (Kind == k_Immediate) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (Kind == k_Immediate) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemTBBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
+
+ void addMemTBHOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
+
+ void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm));
+ }
+
+ void addMemThumbRROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
+
+ void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8s4 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ // Immediate is scaled by 4.
+ Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd));
+ }
+
+ void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ // The sign, shift type, and shift amount are encoded in a single operand
+ // using the AM2 encoding helpers.
+ ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub;
+ unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm,
+ PostIdxReg.ShiftTy);
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+ }
+
+ void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+ }
+
+ void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_ITCondMask);
+ Op->ITMask.Mask = Mask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CondCode);
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_CoprocOption);
+ Op->Cop.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CCOut);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+ Op->RegShiftedReg.ShiftTy = ShTy;
+ Op->RegShiftedReg.SrcReg = SrcReg;
+ Op->RegShiftedReg.ShiftReg = ShiftReg;
+ Op->RegShiftedReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+ Op->RegShiftedImm.ShiftTy = ShTy;
+ Op->RegShiftedImm.SrcReg = SrcReg;
+ Op->RegShiftedImm.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+ Op->ShifterImm.isASR = isASR;
+ Op->ShifterImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+ Op->RotImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+ Op->Bitfield.LSB = LSB;
+ Op->Bitfield.Width = Width;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *
+ CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ KindTy Kind = k_RegisterList;
+
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first))
+ Kind = k_DPRRegisterList;
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
+ contains(Regs.front().first))
+ Kind = k_SPRRegisterList;
+
+ ARMOperand *Op = new ARMOperand(Kind);
+ for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ I = Regs.begin(), E = Regs.end(); I != E; ++I)
+ Op->Registers.push_back(I->first);
+ array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_VectorIndex);
+ Op->VectorIndex.Val = Idx;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_FPImmediate);
+ Op->FPImm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMem(unsigned BaseRegNum,
+ const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum,
+ ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm,
+ unsigned Alignment,
+ bool isNegative,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_Memory);
+ Op->Memory.BaseRegNum = BaseRegNum;
+ Op->Memory.OffsetImm = OffsetImm;
+ Op->Memory.OffsetRegNum = OffsetRegNum;
+ Op->Memory.ShiftType = ShiftType;
+ Op->Memory.ShiftImm = ShiftImm;
+ Op->Memory.Alignment = Alignment;
+ Op->Memory.isNegative = isNegative;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
+ ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+ Op->PostIdxReg.RegNum = RegNum;
+ Op->PostIdxReg.isAdd = isAdd;
+ Op->PostIdxReg.ShiftTy = ShiftTy;
+ Op->PostIdxReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_ProcIFlags);
+ Op->IFlags.Val = IFlags;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_MSRMask);
+ Op->MMask.Val = MMask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_FPImmediate:
+ OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm())
+ << ") >";
+ break;
+ case k_CondCode:
+ OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+ break;
+ case k_CCOut:
+ OS << "<ccout " << getReg() << ">";
+ break;
+ case k_ITCondMask: {
+ static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)",
+ "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)",
+ "(tee)", "(eee)" };
+ assert((ITMask.Mask & 0xf) == ITMask.Mask);
+ OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
+ break;
+ }
+ case k_CoprocNum:
+ OS << "<coprocessor number: " << getCoproc() << ">";
+ break;
+ case k_CoprocReg:
+ OS << "<coprocessor register: " << getCoproc() << ">";
+ break;
+ case k_CoprocOption:
+ OS << "<coprocessor option: " << CoprocOption.Val << ">";
+ break;
+ case k_MSRMask:
+ OS << "<mask: " << getMSRMask() << ">";
+ break;
+ case k_Immediate:
+ getImm()->print(OS);
+ break;
+ case k_MemBarrierOpt:
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ break;
+ case k_Memory:
+ OS << "<memory "
+ << " base:" << Memory.BaseRegNum;
+ OS << ">";
+ break;
+ case k_PostIndexRegister:
+ OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
+ << PostIdxReg.RegNum;
+ if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
+ OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
+ << PostIdxReg.ShiftImm;
+ OS << ">";
+ break;
+ case k_ProcIFlags: {
+ OS << "<ARM_PROC::";
+ unsigned IFlags = getProcIFlags();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ OS << ARM_PROC::IFlagsToString(1 << i);
+ OS << ">";
+ break;
+ }
+ case k_Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case k_ShifterImmediate:
+ OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
+ << " #" << ShifterImm.Imm << ">";
+ break;
+ case k_ShiftedRegister:
+ OS << "<so_reg_reg "
+ << RegShiftedReg.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
+ << ", " << RegShiftedReg.ShiftReg << ", "
+ << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
+ << ">";
+ break;
+ case k_ShiftedImmediate:
+ OS << "<so_reg_imm "
+ << RegShiftedImm.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
+ << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
+ << ">";
+ break;
+ case k_RotateImmediate:
+ OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
+ break;
+ case k_BitfieldDescriptor:
+ OS << "<bitfield " << "lsb: " << Bitfield.LSB
+ << ", width: " << Bitfield.Width << ">";
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList: {
+ OS << "<register_list ";
+
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ) {
+ OS << *I;
+ if (++I < E) OS << ", ";
+ }
+
+ OS << ">";
+ break;
+ }
+ case k_Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case k_VectorIndex:
+ OS << "<vectorindex " << getVectorIndex() << ">";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = tryParseRegister();
+
+ return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name. The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+int ARMAsmParser::tryParseRegister() {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) return -1;
+
+ // FIXME: Validate register for the current architecture; we have to do
+ // validation later, so maybe there is no need for this here.
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ if (!RegNum) {
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+ if (!RegNum) return -1;
+
+ Parser.Lex(); // Eat identifier token.
+
+#if 0
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
+ }
+#endif
+
+ return RegNum;
+}
+
+// Try to parse a shifter (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::tryParseShiftRegister(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("lsl", ARM_AM::lsl)
+ .Case("lsr", ARM_AM::lsr)
+ .Case("asr", ARM_AM::asr)
+ .Case("ror", ARM_AM::ror)
+ .Case("rrx", ARM_AM::rrx)
+ .Default(ARM_AM::no_shift);
+
+ if (ShiftTy == ARM_AM::no_shift)
+ return 1;
+
+ Parser.Lex(); // Eat the operator.
+
+ // The source register for the shift has already been added to the
+ // operand list, so we need to pop it off and combine it into the shifted
+ // register operand instead.
+ OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ if (!PrevOp->isReg())
+ return Error(PrevOp->getStartLoc(), "shift must be of a register");
+ int SrcReg = PrevOp->getReg();
+ int64_t Imm = 0;
+ int ShiftReg = 0;
+ if (ShiftTy == ARM_AM::rrx) {
+ // RRX Doesn't have an explicit shift amount. The encoder expects
+ // the shift register to be the same as the source register. Seems odd,
+ // but OK.
+ ShiftReg = SrcReg;
+ } else {
+ // Figure out if this is shifted by a constant or a register (for non-RRX).
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat hash.
+ SMLoc ImmLoc = Parser.getTok().getLoc();
+ const MCExpr *ShiftExpr = 0;
+ if (getParser().ParseExpression(ShiftExpr)) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // The expression must be evaluatable as an immediate.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+ if (!CE) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+ ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+ Error(ImmLoc, "immediate shift value out of range");
+ return -1;
+ }
+ } else if (Parser.getTok().is(AsmToken::Identifier)) {
+ ShiftReg = tryParseRegister();
+ SMLoc L = Parser.getTok().getLoc();
+ if (ShiftReg == -1) {
+ Error (L, "expected immediate or register in shift operand");
+ return -1;
+ }
+ } else {
+ Error (Parser.getTok().getLoc(),
+ "expected immediate or register in shift operand");
+ return -1;
+ }
+ }
+
+ if (ShiftReg && ShiftTy != ARM_AM::rrx)
+ Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+ ShiftReg, Imm,
+ S, Parser.getTok().getLoc()));
+ else
+ Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
+ S, Parser.getTok().getLoc()));
+
+ return 0;
+}
+
+
+/// Try to parse a register name. The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int RegNo = tryParseRegister();
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ return false;
+ }
+
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
+ }
+
+ return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+ // Use the same layout as the tablegen'erated register name matcher. Ugly,
+ // but efficient.
+ switch (Name.size()) {
+ default: break;
+ case 2:
+ if (Name[0] != CoprocOp)
+ return -1;
+ switch (Name[1]) {
+ default: return -1;
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+ break;
+ case 3:
+ if (Name[0] != CoprocOp || Name[1] != '1')
+ return -1;
+ switch (Name[2]) {
+ default: return -1;
+ case '0': return 10;
+ case '1': return 11;
+ case '2': return 12;
+ case '3': return 13;
+ case '4': return 14;
+ case '5': return 15;
+ }
+ break;
+ }
+
+ return -1;
+}
+
+/// parseITCondCode - Try to parse a condition code for an IT instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ unsigned CC = StringSwitch<unsigned>(Tok.getString())
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC == ~0U)
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the token.
+
+ Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S));
+
+ return MatchOperand_Success;
+}
+
+/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ if (Num == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return MatchOperand_Success;
+}
+
+/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+ return MatchOperand_Success;
+}
+
+/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
+/// coproc_option : '{' imm0_255 '}'
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ // If this isn't a '{', this isn't a coprocessor immediate operand.
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '{'
+
+ const MCExpr *Expr;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(Expr)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE || CE->getValue() < 0 || CE->getValue() > 255) {
+ Error(Loc, "coprocessor option must be an immediate in range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+
+ // Check for and consume the closing '}'
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return MatchOperand_ParseFail;
+ SMLoc E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '}'
+
+ Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
+ return MatchOperand_Success;
+}
+
+// For register list parsing, we need to map from raw GPR register numbering
+// to the enumeration values. The enumeration values aren't sorted by
+// register number due to our using "sp", "lr" and "pc" as canonical names.
+static unsigned getNextRegister(unsigned Reg) {
+ // If this is a GPR, we need to do it manually, otherwise we can rely
+ // on the sort ordering of the enumeration since the other reg-classes
+ // are sane.
+ if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ return Reg + 1;
+ switch(Reg) {
+ default: assert(0 && "Invalid GPR number!");
+ case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
+ case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
+ case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
+ case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8;
+ case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10;
+ case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12;
+ case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR;
+ case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0;
+ }
+}
+
+/// Parse a register list.
+bool ARMAsmParser::
+parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ // Check the first register in the list to see what register class
+ // this is a list of.
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+
+ MCRegisterClass *RC;
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+ else
+ return Error(RegLoc, "invalid register in register list");
+
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+ // Store the first register.
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+
+ // This starts immediately after the first register token in the list,
+ // so we can see either a comma or a minus (range separator) as a legal
+ // next token.
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the comma.
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1)
+ return Error(EndLoc, "register expected");
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!RC->contains(EndReg))
+ return Error(EndLoc, "invalid register in register list");
+ // Ranges must go from low to high.
+ if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ return Error(EndLoc, "bad range in register list");
+
+ // Add all the registers in the range to the register list.
+ while (Reg != EndReg) {
+ Reg = getNextRegister(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ }
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+ // The register must be in the same register class as the first.
+ if (!RC->contains(Reg))
+ return Error(RegLoc, "invalid register in register list");
+ // List must be monotonically increasing.
+ if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+ return Error(RegLoc, "register list not in ascending order");
+ // VFP register lists must also be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+ Reg != OldReg + 1)
+ return Error(RegLoc, "non-contiguous register range");
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(E, "'}' expected");
+ Parser.Lex(); // Eat '}' token.
+
+ Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+ return false;
+}
+
+/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef OptStr = Tok.getString();
+
+ unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
+
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef IFlagsStr = Tok.getString();
+
+ // An iflags string of "none" is interpreted to mean that none of the AIF
+ // bits are set. Not a terribly useful instruction, but a valid encoding.
+ unsigned IFlags = 0;
+ if (IFlagsStr != "none") {
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
+
+ IFlags |= Flag;
+ }
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+ return MatchOperand_Success;
+}
+
+/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef Mask = Tok.getString();
+
+ if (isMClass()) {
+ // See ARMv6-M 10.1.1
+ unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ .Case("apsr", 0)
+ .Case("iapsr", 1)
+ .Case("eapsr", 2)
+ .Case("xpsr", 3)
+ .Case("ipsr", 5)
+ .Case("epsr", 6)
+ .Case("iepsr", 7)
+ .Case("msp", 8)
+ .Case("psp", 9)
+ .Case("primask", 16)
+ .Case("basepri", 17)
+ .Case("basepri_max", 18)
+ .Case("faultmask", 19)
+ .Case("control", 20)
+ .Default(~0U);
+
+ if (FlagsVal == ~0U)
+ return MatchOperand_NoMatch;
+
+ if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
+ // basepri, basepri_max and faultmask only valid for V7m.
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+ }
+
+ // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+ size_t Start = 0, Next = Mask.find('_');
+ StringRef Flags = "";
+ std::string SpecReg = LowercaseString(Mask.slice(Start, Next));
+ if (Next != StringRef::npos)
+ Flags = Mask.slice(Next+1, Mask.size());
+
+ // FlagsVal contains the complete mask:
+ // 3-0: Mask
+ // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ unsigned FlagsVal = 0;
+
+ if (SpecReg == "apsr") {
+ FlagsVal = StringSwitch<unsigned>(Flags)
+ .Case("nzcvq", 0x8) // same as CPSR_f
+ .Case("g", 0x4) // same as CPSR_s
+ .Case("nzcvqg", 0xc) // same as CPSR_fs
+ .Default(~0U);
+
+ if (FlagsVal == ~0U) {
+ if (!Flags.empty())
+ return MatchOperand_NoMatch;
+ else
+ FlagsVal = 8; // No flag
+ }
+ } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+ if (Flags == "all") // cpsr_all is an alias for cpsr_fc
+ Flags = "fc";
+ for (int i = 0, e = Flags.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+ .Case("c", 1)
+ .Case("x", 2)
+ .Case("s", 4)
+ .Case("f", 8)
+ .Default(~0U);
+
+ // If some specific flag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (FlagsVal == ~0U || (FlagsVal & Flag))
+ return MatchOperand_NoMatch;
+ FlagsVal |= Flag;
+ }
+ } else // No match for special register.
+ return MatchOperand_NoMatch;
+
+ // Special register without flags are equivalent to "fc" flags.
+ if (!FlagsVal)
+ FlagsVal = 0x9;
+
+ // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ if (SpecReg == "spsr")
+ FlagsVal |= 16;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
+ int Low, int High) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ std::string LowerOp = LowercaseString(Op);
+ std::string UpperOp = UppercaseString(Op);
+ if (ShiftName != LowerOp && ShiftName != UpperOp) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat shift type token.
+
+ // There must be a '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+ if (Val < Low || Val > High) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = StringSwitch<int>(Tok.getString())
+ .Case("be", 1)
+ .Case("le", 0)
+ .Default(-1);
+ Parser.Lex(); // Eat the token.
+
+ if (Val == -1) {
+ Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
+ getContext()),
+ S, Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+}
+
+/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT
+/// instructions. Legal values are:
+/// lsl #n 'n' in [0,31]
+/// asr #n 'n' in [1,32]
+/// n == 32 encoded as n == 0.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ bool isASR;
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ isASR = false;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ isASR = true;
+ else {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(E, "malformed shift expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(E, "shift amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ if (isASR) {
+ // Shift amount must be in [1,32]
+ if (Val < 1 || Val > 32) {
+ Error(E, "'asr' shift amount must be in range [1,32]");
+ return MatchOperand_ParseFail;
+ }
+ // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
+ if (isThumb() && Val == 32) {
+ Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+ return MatchOperand_ParseFail;
+ }
+ if (Val == 32) Val = 0;
+ } else {
+ // Shift amount must be in [1,32]
+ if (Val < 0 || Val > 31) {
+ Error(E, "'lsr' shift amount must be in range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+
+ return MatchOperand_Success;
+}
+
+/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
+/// of instructions. Legal values are:
+/// ror #n 'n' in {0, 8, 16, 24}
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName != "ror" && ShiftName != "ROR")
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a rotate amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(E, "malformed rotate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(E, "rotate amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension)
+ // normally, zero is represented in asm by omitting the rotate operand
+ // entirely.
+ if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
+ Error(E, "'ror' rotate amount must be 8, 16, or 24");
+ return MatchOperand_ParseFail;
+ }
+
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ // The bitfield descriptor is really two operands, the LSB and the width.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *LSBExpr;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(LSBExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr);
+ if (!CE) {
+ Error(E, "'lsb' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t LSB = CE->getValue();
+ // The LSB must be in the range [0,31]
+ if (LSB < 0 || LSB > 31) {
+ Error(E, "'lsb' operand must be in the range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ // Expect another immediate operand.
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "too few operands");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *WidthExpr;
+ if (getParser().ParseExpression(WidthExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ CE = dyn_cast<MCConstantExpr>(WidthExpr);
+ if (!CE) {
+ Error(E, "'width' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Width = CE->getValue();
+ // The LSB must be in the range [1,32-lsb]
+ if (Width < 1 || Width > 32 - LSB) {
+ Error(E, "'width' operand must be in the range [1,32-lsb]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // postidx_reg := '+' register {, shift}
+ // | '-' register {, shift}
+ // | register {, shift}
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ bool haveEaten = false;
+ bool isAdd = true;
+ int Reg = -1;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+ if (Parser.getTok().is(AsmToken::Identifier))
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Parser.getTok().getLoc();
+
+ ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
+ ShiftImm, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // am3offset := '+' register
+ // | '-' register
+ // | register
+ // | # imm
+ // | # + imm
+ // | # - imm
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ // Do immediates first, as we always parse those if we have a '#'.
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat the '#'.
+ // Explicitly look for a '-', as we need to encode negative zero
+ // differently.
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().ParseExpression(Offset))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Tok.getLoc();
+ // Negative zero is encoded as the flag value INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ Val = INT32_MIN;
+
+ Operands.push_back(
+ ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E));
+
+ return MatchOperand_Success;
+ }
+
+
+ bool haveEaten = false;
+ bool isAdd = true;
+ int Reg = -1;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+ if (Parser.getTok().is(AsmToken::Identifier))
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
+ 0, S, E));
+
+ return MatchOperand_Success;
+}
+
+/// cvtT2LdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtT2StrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+
+/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtThumbMultiple- Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The second source operand must be the same register as the destination
+ // operand.
+ if (Operands.size() == 6 &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[5])->getReg()) &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[4])->getReg())) {
+ Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ return false;
+ }
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+ ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1);
+ // If we have a three-operand form, use that, else the second source operand
+ // is just the destination operand again.
+ if (Operands.size() == 6)
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ else
+ Inst.addOperand(Inst.getOperand(0));
+ ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+
+ return true;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error. The first token must be a '[' when called.
+bool ARMAsmParser::
+parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S, E;
+ assert(Parser.getTok().is(AsmToken::LBrac) &&
+ "Token is not a Left Bracket");
+ S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const AsmToken &BaseRegTok = Parser.getTok();
+ int BaseRegNum = tryParseRegister();
+ if (BaseRegNum == -1)
+ return Error(BaseRegTok.getLoc(), "register expected");
+
+ // The next token must either be a comma or a closing bracket.
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+ return Error(Tok.getLoc(), "malformed memory operand");
+
+ if (Tok.is(AsmToken::RBrac)) {
+ E = Tok.getLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
+ 0, 0, false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand. It's rather odd, but syntactically valid.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!");
+ Parser.Lex(); // Eat the comma.
+
+ // If we have a ':', it's an alignment specifier.
+ if (Parser.getTok().is(AsmToken::Colon)) {
+ Parser.Lex(); // Eat the ':'.
+ E = Parser.getTok().getLoc();
+
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ unsigned Align = 0;
+ switch (CE->getValue()) {
+ default:
+ return Error(E, "alignment specifier must be 64, 128, or 256 bits");
+ case 64: Align = 8; break;
+ case 128: Align = 16; break;
+ case 256: Align = 32; break;
+ }
+
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0,
+ ARM_AM::no_shift, 0, Align,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ // If we have a '#', it's an immediate offset, else assume it's a register
+ // offset.
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat the '#'.
+ E = Parser.getTok().getLoc();
+
+ bool isNegative = getParser().getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().ParseExpression(Offset))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ // If the constant was #-0, represent it as INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ CE = MCConstantExpr::Create(INT32_MIN, getContext());
+
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0,
+ ARM_AM::no_shift, 0, 0,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ // The register offset is optionally preceded by a '+' or '-'
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex(); // Eat the '-'.
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ // Nothing to do.
+ Parser.Lex(); // Eat the '+'.
+ }
+
+ E = Parser.getTok().getLoc();
+ int OffsetRegNum = tryParseRegister();
+ if (OffsetRegNum == -1)
+ return Error(E, "register expected");
+
+ // If there's a shift operator, handle it.
+ ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftType, ShiftImm))
+ return true;
+ }
+
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
+ ShiftType, ShiftImm, 0, isNegative,
+ S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+}
+
+/// parseMemRegOffsetShift - one of these two:
+/// ( lsl | lsr | asr | ror ) , # shift_amount
+/// rrx
+/// return true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
+ unsigned &Amount) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ St = ARM_AM::lsl;
+ else if (ShiftName == "lsr" || ShiftName == "LSR")
+ St = ARM_AM::lsr;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ St = ARM_AM::asr;
+ else if (ShiftName == "ror" || ShiftName == "ROR")
+ St = ARM_AM::ror;
+ else if (ShiftName == "rrx" || ShiftName == "RRX")
+ St = ARM_AM::rrx;
+ else
+ return Error(Loc, "illegal shift operator");
+ Parser.Lex(); // Eat shift type token.
+
+ // rrx stands alone.
+ Amount = 0;
+ if (St != ARM_AM::rrx) {
+ Loc = Parser.getTok().getLoc();
+ // A '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error(Loc, "shift amount must be an immediate");
+ int64_t Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
+ ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
+ return Error(Loc, "immediate shift value out of range");
+ Amount = Imm;
+ }
+
+ return false;
+}
+
+/// parseFPImm - A floating point immediate expression operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (Parser.getTok().isNot(AsmToken::Hash))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '#'.
+
+ // Handle negation, as that still comes through as a separate token.
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex();
+ }
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Real)) {
+ APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 63;
+ int Val = ARM_AM::getFP64Imm(APInt(64, IntVal));
+ Parser.Lex(); // Eat the token.
+ if (Val == -1) {
+ TokError("floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+ if (Tok.is(AsmToken::Integer)) {
+ int64_t Val = Tok.getIntVal();
+ Parser.Lex(); // Eat the token.
+ if (Val > 255 || Val < 0) {
+ TokError("encoded floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+
+ TokError("invalid floating point immediate");
+ return MatchOperand_ParseFail;
+}
+/// Parse a arm instruction operand. For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+ SMLoc S, E;
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Identifier: {
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ return false;
+ }
+
+ // Fall though for the Identifier case that is not a register or a
+ // special name.
+ }
+ case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::Dot: { // . as a branch target
+ // This was not a register so parse other operands that start with an
+ // identifier (like labels) as expressions and create them as immediates.
+ const MCExpr *IdVal;
+ S = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(IdVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::LBrac:
+ return parseMemory(Operands);
+ case AsmToken::LCurly:
+ return parseRegisterList(Operands);
+ case AsmToken::Hash: {
+ // #42 -> immediate.
+ // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *ImmVal;
+ if (getParser().ParseExpression(ImmVal))
+ return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+ return false;
+ }
+ case AsmToken::Colon: {
+ // ":lower16:" and ":upper16:" expression prefixes
+ // FIXME: Check it's an expression prefix,
+ // e.g. (FOO - :lower16:BAR) isn't legal.
+ ARMMCExpr::VariantKind RefKind;
+ if (parsePrefix(RefKind))
+ return true;
+
+ const MCExpr *SubExprVal;
+ if (getParser().ParseExpression(SubExprVal))
+ return true;
+
+ const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+ getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+ return false;
+ }
+ }
+}
+
+// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// :lower16: and :upper16:.
+bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ RefKind = ARMMCExpr::VK_ARM_None;
+
+ // :lower16: and :upper16: modifiers
+ assert(getLexer().is(AsmToken::Colon) && "expected a :");
+ Parser.Lex(); // Eat ':'
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+ return true;
+ }
+
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ if (IDVal == "lower16") {
+ RefKind = ARMMCExpr::VK_ARM_LO16;
+ } else if (IDVal == "upper16") {
+ RefKind = ARMMCExpr::VK_ARM_HI16;
+ } else {
+ Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+ return true;
+ }
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+ return true;
+ }
+ Parser.Lex(); // Eat the last ':'
+ return false;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+// FIXME: This is a bit of a maze of special cases.
+StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod,
+ StringRef &ITMask) {
+ PredicationCode = ARMCC::AL;
+ CarrySetting = false;
+ ProcessorIMod = 0;
+
+ // Ignore some mnemonics we know aren't predicated forms.
+ //
+ // FIXME: Would be nice to autogen this.
+ if ((Mnemonic == "movs" && isThumb()) ||
+ Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" ||
+ Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
+ Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
+ Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal")
+ return Mnemonic;
+
+ // First, split out any predication code. Ignore mnemonics we know aren't
+ // predicated but do have a carry-set and so weren't caught above.
+ if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
+ Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
+ Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
+ Mnemonic != "sbcs" && Mnemonic != "rscs") {
+ unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+ PredicationCode = CC;
+ }
+ }
+
+ // Next, determine if we have a carry setting bit. We explicitly ignore all
+ // the instructions we know end in 's'.
+ if (Mnemonic.endswith("s") &&
+ !(Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
+ Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
+ Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ (Mnemonic == "movs" && isThumb()))) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+ CarrySetting = true;
+ }
+
+ // The "cps" instruction can have a interrupt mode operand which is glued into
+ // the mnemonic. Check if this is the case, split it and parse the imod op
+ if (Mnemonic.startswith("cps")) {
+ // Split out any imod code.
+ unsigned IMod =
+ StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+ .Case("ie", ARM_PROC::IE)
+ .Case("id", ARM_PROC::ID)
+ .Default(~0U);
+ if (IMod != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+ ProcessorIMod = IMod;
+ }
+ }
+
+ // The "it" instruction has the condition mask on the end of the mnemonic.
+ if (Mnemonic.startswith("it")) {
+ ITMask = Mnemonic.slice(2, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 2);
+ }
+
+ return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+ Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+ Mnemonic == "orr" || Mnemonic == "mvn" ||
+ Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+ Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+ (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
+ Mnemonic == "mla" || Mnemonic == "smlal" ||
+ Mnemonic == "umlal" || Mnemonic == "umull"))) {
+ CanAcceptCarrySet = true;
+ } else
+ CanAcceptCarrySet = false;
+
+ if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+ Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+ Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+ Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+ Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" ||
+ (Mnemonic == "clrex" && !isThumb()) ||
+ (Mnemonic == "nop" && isThumbOne()) ||
+ ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" ||
+ Mnemonic == "ldc2" || Mnemonic == "ldc2l" ||
+ Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) ||
+ ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) &&
+ !isThumb()) ||
+ Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) {
+ CanAcceptPredicationCode = false;
+ } else
+ CanAcceptPredicationCode = true;
+
+ if (isThumb()) {
+ if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+ Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+ CanAcceptPredicationCode = false;
+ }
+}
+
+bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME: This is all horribly hacky. We really need a better way to deal
+ // with optional operands like this in the matcher table.
+
+ // The 'mov' mnemonic is special. One variant has a cc_out operand, while
+ // another does not. Specifically, the MOVW instruction does not. So we
+ // special case it here and remove the defaulted (non-setting) cc_out
+ // operand if that's the instruction we're trying to match.
+ //
+ // We do this as post-processing of the explicit operands rather than just
+ // conditionally adding the cc_out in the first place because we need
+ // to check the type of the parsed immediate operand.
+ if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
+ !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
+ static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when there are only two register operands.
+ if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
+ // have to check the immediate range here since Thumb2 has a variant
+ // that can handle a different range and has a cc_out operand.
+ if (((isThumb() && Mnemonic == "add") ||
+ (isThumbTwo() && Mnemonic == "sub")) &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ (static_cast<ARMOperand*>(Operands[5])->isReg() ||
+ static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+ return true;
+ // For Thumb2, add/sub immediate does not have a cc_out operand for the
+ // imm0_4095 variant. That's the least-preferred variant when
+ // selecting via the generic "add" mnemonic, so to know that we
+ // should remove the cc_out operand, we have to explicitly check that
+ // it's not one of the other variants. Ugh.
+ if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ // Nest conditions rather than one big 'if' statement for readability.
+ //
+ // If either register is a high reg, it's either one of the SP
+ // variants (handled above) or a 32-bit encoding, so we just
+ // check against T3.
+ if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+ static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ return false;
+ // If both registers are low, we're in an IT block, and the immediate is
+ // in range, we should use encoding T1 instead, which has a cc_out.
+ if (inITBlock() &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
+ static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+ return false;
+
+ // Otherwise, we use encoding T4, which does not have a cc_out
+ // operand.
+ return true;
+ }
+
+ // The thumb2 multiply instruction doesn't have a CCOut register, so
+ // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
+ // use the 16-bit encoding or not.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isReg() &&
+ // If the registers aren't low regs, the destination reg isn't the
+ // same as one of the source regs, or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock() ||
+ (static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[5])->getReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[4])->getReg())))
+ return true;
+
+
+
+ // Register-register 'add/sub' for thumb does not have a cc_out operand
+ // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also
+ // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't
+ // right, this will result in better diagnostics (which operand is off)
+ // anyway.
+ if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ (Operands.size() == 5 || Operands.size() == 6) &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ return false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Mnemonic = Name.slice(Start, Next);
+
+ // Split out the predication code and carry setting flag from the mnemonic.
+ unsigned PredicationCode;
+ unsigned ProcessorIMod;
+ bool CarrySetting;
+ StringRef ITMask;
+ Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
+ ProcessorIMod, ITMask);
+
+ // In Thumb1, only the branch (B) instruction can be predicated.
+ if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "conditional execution not supported in Thumb1");
+ }
+
+ Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
+
+ // Handle the IT instruction ITMask. Convert it to a bitmask. This
+ // is the mask as it will be for the IT encoding if the conditional
+ // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
+ // where the conditional bit0 is zero, the instruction post-processing
+ // will adjust the mask accordingly.
+ if (Mnemonic == "it") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+ if (ITMask.size() > 3) {
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "too many conditions on IT instruction");
+ }
+ unsigned Mask = 8;
+ for (unsigned i = ITMask.size(); i != 0; --i) {
+ char pos = ITMask[i - 1];
+ if (pos != 't' && pos != 'e') {
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
+ }
+ Mask >>= 1;
+ if (ITMask[i - 1] == 't')
+ Mask |= 8;
+ }
+ Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
+ }
+
+ // FIXME: This is all a pretty gross hack. We should automatically handle
+ // optional operands like this via tblgen.
+
+ // Next, add the CCOut and ConditionCode operands, if needed.
+ //
+ // For mnemonics which can ever incorporate a carry setting bit or predication
+ // code, our matching model involves us always generating CCOut and
+ // ConditionCode operands to match the mnemonic "as written" and then we let
+ // the matcher deal with finding the right instruction or generating an
+ // appropriate error.
+ bool CanAcceptCarrySet, CanAcceptPredicationCode;
+ getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+ // If we had a carry-set on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptCarrySet && CarrySetting) {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' can not set flags, but 's' suffix specified");
+ }
+ // If we had a predication code on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' is not predicable, but condition code specified");
+ }
+
+ // Add the carry setting operand, if necessary.
+ if (CanAcceptCarrySet) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
+ Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+ Loc));
+ }
+
+ // Add the predication code operand, if necessary.
+ if (CanAcceptPredicationCode) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+ CarrySetting);
+ Operands.push_back(ARMOperand::CreateCondCode(
+ ARMCC::CondCodes(PredicationCode), Loc));
+ }
+
+ // Add the processor imod operand, if necessary.
+ if (ProcessorIMod) {
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(ProcessorIMod, getContext()),
+ NameLoc, NameLoc));
+ }
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ StringRef ExtraToken = Name.slice(Start, Next);
+
+ // For now, we're only parsing Thumb1 (for the most part), so
+ // just ignore ".n" qualifiers. We'll use them to restrict
+ // matching when we do Thumb2.
+ if (ExtraToken != ".n") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
+ }
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (parseOperand(Operands, Mnemonic)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (parseOperand(Operands, Mnemonic)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ // Some instructions, mostly Thumb, have forms for the same mnemonic that
+ // do and don't have a cc_out optional-def operand. With some spot-checks
+ // of the operand list, we can figure out which variant we're trying to
+ // parse and adjust accordingly before actually matching. We shouldn't ever
+ // try to remove a cc_out operand that was explicitly set on the the
+ // mnemonic, of course (CarrySetting == true). Reason number #317 the
+ // table driven matcher doesn't fit well with the ARM instruction set.
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // ARM mode 'blx' need special handling, as the register operand version
+ // is predicable, but the label operand version is not. So, we can't rely
+ // on the Mnemonic based checking to correctly figure out when to put
+ // a k_CondCode operand in the list. If we're trying to match the label
+ // version, remove the k_CondCode operand here.
+ if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+ static_cast<ARMOperand*>(Operands[2])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // The vector-compare-to-zero instructions have a literal token "#0" at
+ // the end that comes to here as an immediate operand. Convert it to a
+ // token to play nicely with the matcher.
+ if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 5);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+ // VCMP{E} does the same thing, but with a different operand count.
+ if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[4])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 4);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+ // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
+ // end. Convert it to a token here.
+ if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 5);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+
+ return false;
+}
+
+// Validate context-sensitive operand constraints.
+
+// return 'true' if register list contains non-low GPR registers,
+// 'false' otherwise. If Reg is in the register list or is HiReg, set
+// 'containsReg' to true.
+static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
+ unsigned HiReg, bool &containsReg) {
+ containsReg = false;
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ containsReg = true;
+ // Anything other than a low register isn't legal here.
+ if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg))
+ return true;
+ }
+ return false;
+}
+
+// Check if the specified regisgter is in the register list of the inst,
+// starting at the indicated operand number.
+static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ return true;
+ }
+ return false;
+}
+
+// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
+// the ARMInsts array) instead. Getting that here requires awkward
+// API changes, though. Better way?
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+static MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return ARMInsts[Opcode];
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool ARMAsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ SMLoc Loc = Operands[0]->getStartLoc();
+ // Check the IT block state first.
+ // NOTE: In Thumb mode, the BKPT instruction has the interesting property of
+ // being allowed in IT blocks, but not being predicable. It just always
+ // executes.
+ if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) {
+ unsigned bit = 1;
+ if (ITState.FirstCond)
+ ITState.FirstCond = false;
+ else
+ bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ // The instruction must be predicable.
+ if (!MCID.isPredicable())
+ return Error(Loc, "instructions in IT block must be predicable");
+ unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
+ unsigned ITCond = bit ? ITState.Cond :
+ ARMCC::getOppositeCondition(ITState.Cond);
+ if (Cond != ITCond) {
+ // Find the condition code Operand to get its SMLoc information.
+ SMLoc CondLoc;
+ for (unsigned i = 1; i < Operands.size(); ++i)
+ if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
+ CondLoc = Operands[i]->getStartLoc();
+ return Error(CondLoc, "incorrect condition in IT block; got '" +
+ StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
+ "', but expected '" +
+ ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+ }
+ // Check for non-'al' condition codes outside of the IT block.
+ } else if (isThumbTwo() && MCID.isPredicable() &&
+ Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+ ARMCC::AL && Inst.getOpcode() != ARM::tB &&
+ Inst.getOpcode() != ARM::t2B)
+ return Error(Loc, "predicated instructions must be in IT block");
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDREXD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands must be sequential");
+ return false;
+ }
+ case ARM::STRD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STREXD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::SBFX:
+ case ARM::UBFX: {
+ // width must be in range [1, 32-lsb]
+ unsigned lsb = Inst.getOperand(2).getImm();
+ unsigned widthm1 = Inst.getOperand(3).getImm();
+ if (widthm1 >= 32 - lsb)
+ return Error(Operands[5]->getStartLoc(),
+ "bitfield width must be in range [1,32-lsb]");
+ return false;
+ }
+ case ARM::tLDMIA: {
+ // If we're parsing Thumb2, the .w variant is available and handles
+ // most cases that are normally illegal for a Thumb1 LDM
+ // instruction. We'll make the transformation in processInstruction()
+ // if necessary.
+ //
+ // Thumb LDM instructions are writeback iff the base register is not
+ // in the register list.
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ "registers must be in range r0-r7");
+ // If we should have writeback, then there should be a '!' token.
+ if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "writeback operator '!' expected");
+ // If we should not have writeback, there must not be a '!'. This is
+ // true even for the 32-bit wide encodings.
+ if (listContainsBase && hasWritebackToken)
+ return Error(Operands[3]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+
+ break;
+ }
+ case ARM::t2LDMIA_UPD: {
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+ break;
+ }
+ case ARM::tPOP: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase))
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or pc");
+ break;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase))
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or lr");
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[4]->getStartLoc(),
+ "registers must be in range r0-r7");
+ break;
+ }
+ }
+
+ return false;
+}
+
+void ARMAsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMIA_UPD:
+ // If this is a load of a single register via a 'pop', then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDR_POST_IMM);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // am2offset
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::STMDB_UPD:
+ // If this is a store of a single register via a 'push', then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STR_PRE_IMM);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::tADDi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ Inst.setOpcode(ARM::tADDi3);
+ break;
+ case ARM::tSUBi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ Inst.setOpcode(ARM::tSUBi3);
+ break;
+ case ARM::tB:
+ // A Thumb conditional branch outside of an IT block is a tBcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ Inst.setOpcode(ARM::tBcc);
+ break;
+ case ARM::t2B:
+ // A Thumb2 conditional branch outside of an IT block is a t2Bcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ case ARM::t2Bcc:
+ // If the conditional is AL or we're in an IT block, we really want t2B.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ case ARM::tBcc:
+ // If the conditional is AL, we really want tB.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL)
+ Inst.setOpcode(ARM::tB);
+ break;
+ case ARM::tLDMIA: {
+ // If the register list contains any high registers, or if the writeback
+ // doesn't match what tLDMIA can do, we need to use the 32-bit encoding
+ // instead if we're in Thumb2. Otherwise, this should have generated
+ // an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
+ (!listContainsBase && !hasWritebackToken) ||
+ (listContainsBase && hasWritebackToken)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA);
+ // If we're switching to the updating version, we need to insert
+ // the writeback tied operand.
+ if (hasWritebackToken)
+ Inst.insert(Inst.begin(),
+ MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+ }
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMIA_UPD);
+ }
+ break;
+ }
+ case ARM::t2MOVi: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(1).getImm() <= 255 &&
+ ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't in the same order for tMOVi8...
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2MOVr: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't the same for tMOV[S]r... (no cc_out)
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2SXTH:
+ case ARM::t2SXTB:
+ case ARM::t2UXTH:
+ case ARM::t2UXTB: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == 0 &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Illegal opcode!");
+ case ARM::t2SXTH: NewOpc = ARM::tSXTH; break;
+ case ARM::t2SXTB: NewOpc = ARM::tSXTB; break;
+ case ARM::t2UXTH: NewOpc = ARM::tUXTH; break;
+ case ARM::t2UXTB: NewOpc = ARM::tUXTB; break;
+ }
+ // The operands aren't the same for thumb1 (no rotate operand).
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2IT: {
+ // The mask bits for all but the first condition are represented as
+ // the low bit of the condition code value implies 't'. We currently
+ // always have 1 implies 't', so XOR toggle the bits if the low bit
+ // of the condition code is zero. The encoding also expects the low
+ // bit of the condition to be encoded as bit 4 of the mask operand,
+ // so mask that in if needed
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned Mask = MO.getImm();
+ unsigned OrigMask = Mask;
+ unsigned TZ = CountTrailingZeros_32(Mask);
+ if ((Inst.getOperand(0).getImm() & 1) == 0) {
+ assert(Mask && TZ <= 3 && "illegal IT mask value!");
+ for (unsigned i = 3; i != TZ; --i)
+ Mask ^= 1 << i;
+ } else
+ Mask |= 0x10;
+ MO.setImm(Mask);
+
+ // Set up the IT block state according to the IT instruction we just
+ // matched.
+ assert(!inITBlock() && "nested IT blocks?!");
+ ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
+ ITState.Mask = OrigMask; // Use the original mask, not the updated one.
+ ITState.CurPosition = 0;
+ ITState.FirstCond = true;
+ break;
+ }
+ }
+}
+
+unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // 16-bit thumb arithmetic instructions either require or preclude the 'S'
+ // suffix depending on whether they're in an IT block or not.
+ unsigned Opc = Inst.getOpcode();
+ MCInstrDesc &MCID = getInstDesc(Opc);
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.hasOptionalDef() &&
+ "optionally flag setting instruction missing optional def operand");
+ assert(MCID.NumOperands == Inst.getNumOperands() &&
+ "operand count mismatch!");
+ // Find the optional-def operand (cc_out).
+ unsigned OpNo;
+ for (OpNo = 0;
+ !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands;
+ ++OpNo)
+ ;
+ // If we're parsing Thumb1, reject it completely.
+ if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
+ return Match_MnemonicFail;
+ // If we're parsing Thumb2, which form is legal depends on whether we're
+ // in an IT block.
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
+ !inITBlock())
+ return Match_RequiresITBlock;
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
+ inITBlock())
+ return Match_RequiresNotITBlock;
+ }
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ else if (Opc == ARM::tADDhirr && isThumbOne() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
+ return Match_Success;
+}
+
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ unsigned ErrorInfo;
+ unsigned MatchResult;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ switch (MatchResult) {
+ default: break;
+ case Match_Success:
+ // Context sensitive operand constraints aren't handled by the matcher,
+ // so check them here.
+ if (validateInstruction(Inst, Operands)) {
+ // Still progress the IT block, otherwise one wrong condition causes
+ // nasty cascading errors.
+ forwardITPosition();
+ return true;
+ }
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected.
+ processInstruction(Inst, Operands);
+
+ // Only move forward at the very end so that everything in validate
+ // and process gets a consistent answer about whether we're in an IT
+ // block.
+ forwardITPosition();
+
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+ case Match_ConversionFail:
+ // The converter function will have already emited a diagnostic.
+ return true;
+ case Match_RequiresNotITBlock:
+ return Error(IDLoc, "flag setting instruction only valid outside IT block");
+ case Match_RequiresITBlock:
+ return Error(IDLoc, "instruction only valid inside IT block");
+ case Match_RequiresV6:
+ return Error(IDLoc, "instruction variant requires ARMv6 or later");
+ case Match_RequiresThumb2:
+ return Error(IDLoc, "instruction variant requires Thumb2");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+/// parseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return parseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".thumb")
+ return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".thumb_func")
+ return parseDirectiveThumbFunc(DirectiveID.getLoc());
+ else if (IDVal == ".code")
+ return parseDirectiveCode(DirectiveID.getLoc());
+ else if (IDVal == ".syntax")
+ return parseDirectiveSyntax(DirectiveID.getLoc());
+ return true;
+}
+
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// parseDirectiveThumb
+/// ::= .thumb
+bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO: set thumb mode
+ // TODO: tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// parseDirectiveThumbFunc
+/// ::= .thumbfunc symbol_name
+bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
+ const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
+ bool isMachO = MAI.hasSubsectionsViaSymbols();
+ StringRef Name;
+
+ // Darwin asm has function name after .thumb_func direction
+ // ELF doesn't
+ if (isMachO) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+ return Error(L, "unexpected token in .thumb_func directive");
+ Name = Tok.getString();
+ Parser.Lex(); // Consume the identifier token.
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // FIXME: assuming function name will be the line following .thumb_func
+ if (!isMachO) {
+ Name = Parser.getTok().getString();
+ }
+
+ // Mark symbol as a thumb symbol.
+ MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+ getParser().getStreamer().EmitThumbFunc(Func);
+ return false;
+}
+
+/// parseDirectiveSyntax
+/// ::= .syntax unified | divided
+bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(L, "unexpected token in .syntax directive");
+ StringRef Mode = Tok.getString();
+ if (Mode == "unified" || Mode == "UNIFIED")
+ Parser.Lex();
+ else if (Mode == "divided" || Mode == "DIVIDED")
+ return Error(L, "'.syntax divided' arm asssembly not supported");
+ else
+ return Error(L, "unrecognized syntax mode in .syntax directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// parseDirectiveCode
+/// ::= .code 16 | 32
+bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Integer))
+ return Error(L, "unexpected token in .code directive");
+ int64_t Val = Parser.getTok().getIntVal();
+ if (Val == 16)
+ Parser.Lex();
+ else if (Val == 32)
+ Parser.Lex();
+ else
+ return Error(L, "invalid operand to .code directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ if (Val == 16) {
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ } else {
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+
+ return false;
+}
+
+extern "C" void LLVMInitializeARMAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+ RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
+ RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
+ LLVMInitializeARMAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 0000000..8f2f813
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,4082 @@
+//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ARMDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ARMDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+};
+
+/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ThumbDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ThumbDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ mutable std::vector<unsigned> ITBlock;
+ DecodeStatus AddThumbPredicate(MCInst&) const;
+ void UpdateThumbVFPPredicate(MCInst&) const;
+};
+}
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ return false;
+}
+
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst,
+ unsigned Insn,
+ uint64_t Adddress,
+ const void *Decoder);
+static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+
+
+#include "ARMGenDisassemblerTables.inc"
+#include "ARMGenInstrInfo.inc"
+#include "ARMGenEDInfo.inc"
+
+static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ARMDisassembler(STI);
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ThumbDisassembler(STI);
+}
+
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ // VFP and NEON instructions, similarly, are shared between ARM
+ // and Thumb modes.
+ MI.clear();
+ result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst. The immediate Value has had any PC
+/// adjustment made by the caller. If the instruction is a branch instruction
+/// then isBranch is true, else false. If the getOpInfo() function was set as
+/// part of the setupForSymbolicDisassembly() call then that function is called
+/// to get any symbolic information at the Address for this instruction. If
+/// that returns non-zero then the symbolic information it returns is used to
+/// create an MCExpr and that is added as an operand to the MCInst. If
+/// getOpInfo() returns zero and isBranch is true then a symbol look up for
+/// Value is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with Value is created. This function returns true if it adds an
+/// operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
+ bool isBranch, uint64_t InstSize,
+ MCInst &MI, const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+ if (!getOpInfo)
+ return false;
+
+ struct LLVMOpInfo1 SymbolicOp;
+ SymbolicOp.Value = Value;
+ void *DisInfo = Dis->getDisInfoBlock();
+ if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ if (isBranch) {
+ LLVMSymbolLookupCallback SymbolLookUp =
+ Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ SymbolicOp.Value = 0;
+ }
+ else {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ }
+ else {
+ return false;
+ }
+ }
+ else {
+ return false;
+ }
+ }
+
+ MCContext *Ctx = Dis->getMCContext();
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ else
+ assert(0 && "bad SymbolicOp.VariantKind");
+
+ return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the Pc.
+/// These can often be values in a literal pool near the Address of the
+/// instruction. The Address of the instruction and its immediate Value are
+/// used as a possible literal pool entry. The SymbolLookUp call back will
+/// return the name of a symbol referenced by the the literal pool's entry if
+/// the referenced address is that of a symbol. Or it will return a pointer to
+/// a literal 'C' string if the referenced address of the literal pool's entry
+/// is an address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ void *DisInfo = Dis->getDisInfoBlock();
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+ const char *ReferenceName;
+ (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
+ ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+ }
+}
+
+// Thumb1 instructions don't have explicit S bits. Rather, they
+// implicitly set CPSR. Since it's not represented in the encoding, the
+// auto-generated decoder won't inject the CPSR operand. We need to fix
+// that as a post-pass.
+static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) {
+ if (i > 0 && OpInfo[i-1].isPredicate()) continue;
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+ return;
+ }
+ }
+
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+}
+
+// Most Thumb instructions don't have explicit predicates in the
+// encoding, but rather get their predicates from IT context. We need
+// to fix up the predicate operands using this context information as a
+// post-pass.
+MCDisassembler::DecodeStatus
+ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ MCDisassembler::DecodeStatus S = Success;
+
+ // A few instructions actually have predicates encoded in them. Don't
+ // try to overwrite it if we're seeing one of those.
+ switch (MI.getOpcode()) {
+ case ARM::tBcc:
+ case ARM::t2Bcc:
+ case ARM::tCBZ:
+ case ARM::tCBNZ:
+ case ARM::tCPS:
+ case ARM::t2CPS3p:
+ case ARM::t2CPS2p:
+ case ARM::t2CPS1p:
+ case ARM::tMOVSr:
+ case ARM::tSETEND:
+ // Some instructions (mostly conditional branches) are not
+ // allowed in IT blocks.
+ if (!ITBlock.empty())
+ S = SoftFail;
+ else
+ return Success;
+ break;
+ case ARM::tB:
+ case ARM::t2B:
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Some instructions (mostly unconditional branches) can
+ // only appears at the end of, or outside of, an IT.
+ if (ITBlock.size() > 1)
+ S = SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ // If we're in an IT block, base the predicate on that. Otherwise,
+ // assume a predicate of AL.
+ unsigned CC;
+ if (!ITBlock.empty()) {
+ CC = ITBlock.back();
+ if (CC == 0xF)
+ CC = ARMCC::AL;
+ ITBlock.pop_back();
+ } else
+ CC = ARMCC::AL;
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isPredicate()) {
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+ return S;
+ }
+ }
+
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+
+ return S;
+}
+
+// Thumb VFP instructions are a special case. Because we share their
+// encodings between ARM and Thumb modes, and they are predicable in ARM
+// mode, the auto-generated decoder will give them an (incorrect)
+// predicate operand. We need to rewrite these operands based on the IT
+// context as a post-pass.
+void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+ unsigned CC;
+ if (!ITBlock.empty()) {
+ CC = ITBlock.back();
+ ITBlock.pop_back();
+ } else
+ CC = ARMCC::AL;
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ MCInst::iterator I = MI.begin();
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (OpInfo[i].isPredicate() ) {
+ I->setImm(CC);
+ ++I;
+ if (CC == ARMCC::AL)
+ I->setReg(0);
+ else
+ I->setReg(ARM::CPSR);
+ return;
+ }
+ }
+}
+
+DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert((STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
+
+ // We want to read exactly 2 bytes of data.
+ if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint16_t insn16 = (bytes[1] << 8) | bytes[0];
+ DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+ if (result) {
+ Size = 2;
+ bool InITBlock = !ITBlock.empty();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+
+ // Nested IT blocks are UNPREDICTABLE. Must be checked before we add
+ // the Thumb predicate.
+ if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty())
+ result = MCDisassembler::SoftFail;
+
+ Check(result, AddThumbPredicate(MI));
+
+ // If we find an IT instruction, we need to parse its condition
+ // code and mask operands so that we can apply them correctly
+ // to the subsequent instructions.
+ if (MI.getOpcode() == ARM::t2IT) {
+
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned firstcond = MI.getOperand(0).getImm();
+ unsigned Mask = MI.getOperand(1).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITBlock.insert(ITBlock.begin(), firstcond);
+ else
+ ITBlock.insert(ITBlock.begin(), firstcond ^ 1);
+ }
+
+ ITBlock.push_back(firstcond);
+ }
+
+ return result;
+ }
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint32_t insn32 = (bytes[3] << 8) |
+ (bytes[2] << 0) |
+ (bytes[1] << 24) |
+ (bytes[0] << 16);
+ MI.clear();
+ result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ bool InITBlock = ITBlock.size();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ UpdateThumbVFPPredicate(MI);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+ MI.clear();
+ uint32_t NEONLdStInsn = insn32;
+ NEONLdStInsn &= 0xF0FFFFFF;
+ NEONLdStInsn |= 0x04000000;
+ result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+ MI.clear();
+ uint32_t NEONDataInsn = insn32;
+ NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONDataInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+ createThumbDisassembler);
+}
+
+static const unsigned GPRDecoderTable[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::SP, ARM::LR, ARM::PC
+};
+
+static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = ARM::R0;
+ break;
+ case 1:
+ Register = ARM::R1;
+ break;
+ case 2:
+ Register = ARM::R2;
+ break;
+ case 3:
+ Register = ARM::R3;
+ break;
+ case 9:
+ Register = ARM::R9;
+ break;
+ case 12:
+ Register = ARM::R12;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const unsigned SPRDecoderTable[] = {
+ ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15,
+ ARM::S16, ARM::S17, ARM::S18, ARM::S19,
+ ARM::S20, ARM::S21, ARM::S22, ARM::S23,
+ ARM::S24, ARM::S25, ARM::S26, ARM::S27,
+ ARM::S28, ARM::S29, ARM::S30, ARM::S31
+};
+
+static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const unsigned DPRDecoderTable[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15,
+ ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+ ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+ ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31
+};
+
+static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const unsigned QPRDecoderTable[] = {
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
+};
+
+
+static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+ RegNo >>= 1;
+
+ unsigned Register = QPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xF) return MCDisassembler::Fail;
+ // AL predicate is not allowed on Thumb1 branches.
+ if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ if (Val == ARMCC::AL) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ } else
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val)
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ uint32_t imm = Val & 0xFF;
+ uint32_t rot = (Val & 0xF00) >> 7;
+ uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F));
+ Inst.addOperand(MCOperand::CreateImm(rot_imm));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned imm = fieldFromInstruction32(Val, 7, 5);
+
+ // Register-immediate
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ if (Shift == ARM_AM::ror && imm == 0)
+ Shift = ARM_AM::rrx;
+
+ unsigned Op = Shift | (imm << 3);
+ Inst.addOperand(MCOperand::CreateImm(Op));
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+
+ // Register-register
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+
+ return S;
+}
+
+static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ bool writebackLoad = false;
+ unsigned writebackReg = 0;
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ writebackLoad = true;
+ writebackReg = Inst.getOperand(0).getReg();
+ break;
+ }
+
+ // Empty register lists are not allowed.
+ if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail;
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Val & (1 << i)) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback not allowed if Rn is in the target list.
+ if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ Check(S, MCDisassembler::SoftFail);
+ }
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction32(Val, 8, 4);
+ unsigned regs = Val & 0xFF;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction32(Val, 8, 4);
+ unsigned regs = (Val & 0xFF) / 2;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // This operand encodes a mask of contiguous zeros between a specified MSB
+ // and LSB. To decode it, we create the mask of all bits MSB-and-lower,
+ // the mask of all bits LSB-and-lower, and then xor them to create
+ // the mask of that's all ones on [msb, lsb]. Finally we not it to
+ // create the final mask.
+ unsigned msb = fieldFromInstruction32(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+
+ uint32_t msb_mask = 0xFFFFFFFF;
+ if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
+ uint32_t lsb_mask = (1U << lsb) - 1;
+
+ Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask)));
+ return S;
+}
+
+static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_OPTION:
+ if (coproc == 0xA || coproc == 0xB)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(coproc));
+ Inst.addOperand(MCOperand::CreateImm(CRd));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STC2L_PRE:
+ case ARM::LDC2_OFFSET:
+ case ARM::LDC2L_OFFSET:
+ case ARM::LDC2_PRE:
+ case ARM::LDC2L_PRE:
+ case ARM::STC2_OFFSET:
+ case ARM::STC2L_OFFSET:
+ case ARM::STC2_PRE:
+ case ARM::STC2L_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STCL_PRE:
+ case ARM::LDC_OFFSET:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDCL_PRE:
+ case ARM::STC_OFFSET:
+ case ARM::STCL_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STCL_PRE:
+ imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm);
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2L_POST:
+ case ARM::LDC2_POST:
+ case ARM::LDC2L_POST:
+ case ARM::STC2_POST:
+ case ARM::STC2L_POST:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDCL_POST:
+ case ARM::t2STC_POST:
+ case ARM::t2STCL_POST:
+ case ARM::LDC_POST:
+ case ARM::LDCL_POST:
+ case ARM::STC_POST:
+ case ARM::STCL_POST:
+ imm |= U << 8;
+ // fall through.
+ default:
+ // The 'option' variant doesn't encode 'U' in the immediate since
+ // the immediate is unsigned [0,255].
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction32(Insn, 25, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRT_POST_REG:
+ case ARM::STRT_POST_IMM:
+ case ARM::STRBT_POST_REG:
+ case ARM::STRBT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRBT_POST_REG:
+ case ARM::LDRBT_POST_IMM:
+ case ARM::LDRT_POST_REG:
+ case ARM::LDRT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::AddrOpc Op = ARM_AM::add;
+ if (!fieldFromInstruction32(Insn, 23, 1))
+ Op = ARM_AM::sub;
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail; // UNPREDICTABLE
+
+ if (reg) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
+ switch( fieldFromInstruction32(Insn, 5, 2)) {
+ case 0:
+ Opc = ARM_AM::lsl;
+ break;
+ case 1:
+ Opc = ARM_AM::lsr;
+ break;
+ case 2:
+ Opc = ARM_AM::asr;
+ break;
+ case 3:
+ Opc = ARM_AM::ror;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+ unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ } else {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode);
+ Inst.addOperand(MCOperand::CreateImm(tmp));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned imm = fieldFromInstruction32(Val, 7, 5);
+ unsigned U = fieldFromInstruction32(Val, 12, 1);
+
+ ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ ShOp = ARM_AM::lsl;
+ break;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ break;
+ case 2:
+ ShOp = ARM_AM::asr;
+ break;
+ case 3:
+ ShOp = ARM_AM::ror;
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ unsigned shift;
+ if (U)
+ shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp);
+ else
+ shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp);
+ Inst.addOperand(MCOperand::CreateImm(shift));
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned type = fieldFromInstruction32(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction32(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+
+ bool writeback = (W == 1) | (P == 0);
+
+ // For {LD,ST}RD, Rt must be even, else undefined.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (Rt & 0x1) return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) { // Writeback
+ if (P)
+ U |= ARMII::IndexModePre << 9;
+ else
+ U |= ARMII::IndexModePost << 9;
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) {
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ case ARM::LDRHTr:
+ case ARM::LDRSBTr:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (type) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm));
+ } else {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(U));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+
+ switch (mode) {
+ case 0:
+ mode = ARM_AM::da;
+ break;
+ case 1:
+ mode = ARM_AM::ia;
+ break;
+ case 2:
+ mode = ARM_AM::db;
+ break;
+ case 3:
+ mode = ARM_AM::ib;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+
+ if (pred == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMDA:
+ Inst.setOpcode(ARM::RFEDA);
+ break;
+ case ARM::LDMDA_UPD:
+ Inst.setOpcode(ARM::RFEDA_UPD);
+ break;
+ case ARM::LDMDB:
+ Inst.setOpcode(ARM::RFEDB);
+ break;
+ case ARM::LDMDB_UPD:
+ Inst.setOpcode(ARM::RFEDB_UPD);
+ break;
+ case ARM::LDMIA:
+ Inst.setOpcode(ARM::RFEIA);
+ break;
+ case ARM::LDMIA_UPD:
+ Inst.setOpcode(ARM::RFEIA_UPD);
+ break;
+ case ARM::LDMIB:
+ Inst.setOpcode(ARM::RFEIB);
+ break;
+ case ARM::LDMIB_UPD:
+ Inst.setOpcode(ARM::RFEIB_UPD);
+ break;
+ case ARM::STMDA:
+ Inst.setOpcode(ARM::SRSDA);
+ break;
+ case ARM::STMDA_UPD:
+ Inst.setOpcode(ARM::SRSDA_UPD);
+ break;
+ case ARM::STMDB:
+ Inst.setOpcode(ARM::SRSDB);
+ break;
+ case ARM::STMDB_UPD:
+ Inst.setOpcode(ARM::SRSDB_UPD);
+ break;
+ case ARM::STMIA:
+ Inst.setOpcode(ARM::SRSIA);
+ break;
+ case ARM::STMIA_UPD:
+ Inst.setOpcode(ARM::SRSIA_UPD);
+ break;
+ case ARM::STMIB:
+ Inst.setOpcode(ARM::SRSIB);
+ break;
+ case ARM::STMIB_UPD:
+ Inst.setOpcode(ARM::SRSIB_UPD);
+ break;
+ default:
+ if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail;
+ }
+
+ // For stores (which become SRS's, the only operand is the mode.
+ if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+ Inst.addOperand(
+ MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+ return S;
+ }
+
+ return DecodeRFEInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail; // Tied
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction32(Insn, 18, 2);
+ unsigned M = fieldFromInstruction32(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction32(Insn, 9, 2);
+ unsigned M = fieldFromInstruction32(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::t2CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::t2CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+
+ if (Inst.getOpcode() == ARM::t2MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+
+ if (Inst.getOpcode() == ARM::MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned add = fieldFromInstruction32(Val, 12, 1);
+ unsigned imm = fieldFromInstruction32(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!add) imm *= -1;
+ if (imm == 0 && !add) imm = INT32_MIN;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ if (Rn == 15)
+ tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder);
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned U = fieldFromInstruction32(Val, 8, 1);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (U)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+
+ if (pred == 0xF) {
+ Inst.setOpcode(ARM::BLXi);
+ imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ return S;
+ }
+
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
+ 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned align = fieldFromInstruction32(Val, 4, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!align)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else
+ Inst.addOperand(MCOperand::CreateImm(4 << align));
+
+ return S;
+}
+
+static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ // First output register
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Second output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD1d8T:
+ case ARM::VLD1d16T:
+ case ARM::VLD1d32T:
+ case ARM::VLD1d64T:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2b8:
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8_UPD:
+ case ARM::VLD2b16_UPD:
+ case ARM::VLD2b32_UPD:
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
+ // Third output register
+ switch(Inst.getOpcode()) {
+ case ARM::VLD1d8T:
+ case ARM::VLD1d16T:
+ case ARM::VLD1d32T:
+ case ARM::VLD1d64T:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Writeback operand
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8_UPD:
+ case ARM::VLD1d16_UPD:
+ case ARM::VLD1d32_UPD:
+ case ARM::VLD1d64_UPD:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD2b8_UPD:
+ case ARM::VLD2b16_UPD:
+ case ARM::VLD2b32_UPD:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ // Writeback Operand
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8_UPD:
+ case ARM::VST1d16_UPD:
+ case ARM::VST1d32_UPD:
+ case ARM::VST1d64_UPD:
+ case ARM::VST1q8_UPD:
+ case ARM::VST1q16_UPD:
+ case ARM::VST1q32_UPD:
+ case ARM::VST1q64_UPD:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2d8_UPD:
+ case ARM::VST2d16_UPD:
+ case ARM::VST2d32_UPD:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST2b8_UPD:
+ case ARM::VST2b16_UPD:
+ case ARM::VST2b32_UPD:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // First input register
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Second input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q8:
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8_UPD:
+ case ARM::VST1q16_UPD:
+ case ARM::VST1q32_UPD:
+ case ARM::VST1q64_UPD:
+ case ARM::VST1d8T:
+ case ARM::VST1d16T:
+ case ARM::VST1d32T:
+ case ARM::VST1d64T:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2d8:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8_UPD:
+ case ARM::VST2d16_UPD:
+ case ARM::VST2d32_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b8:
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8_UPD:
+ case ARM::VST2b16_UPD:
+ case ARM::VST2b32_UPD:
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Third input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8T:
+ case ARM::VST1d16T:
+ case ARM::VST1d32T:
+ case ARM::VST1d64T:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
+
+ align *= (1 << size);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (regs == 2) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ align *= 2*size;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+
+ if (size == 0x3) {
+ size = 4;
+ align = 16;
+ } else {
+ if (size == 2) {
+ size = 1 << size;
+ align *= 8;
+ } else {
+ size = 1 << size;
+ align *= 4*size;
+ }
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+
+ if (Q) {
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ switch (Inst.getOpcode()) {
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv2i32:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv2i32:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VORRiv8i16:
+ case ARM::VORRiv4i32:
+ case ARM::VBICiv8i16:
+ case ARM::VBICiv4i32:
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 18, 2);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(8 << size));
+
+ return S;
+}
+
+static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction32(Insn, 6, 1);
+ unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (op) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail; // Writeback
+ }
+
+ for (unsigned i = 0; i < length; ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned dst = fieldFromInstruction16(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch(Inst.getOpcode()) {
+ default:
+ return MCDisassembler::Fail;
+ case ARM::tADR:
+ break; // tADR does not explicitly represent the PC as an operand.
+ case ARM::tADDrSPi:
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ return S;
+}
+
+static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 0, 3);
+ unsigned imm = fieldFromInstruction32(Val, 3, 5);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = Val << 2;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 2, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDs:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIs:
+ break;
+ default: {
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ }
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ if (Rn == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBs:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHs:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2PLDs:
+ Inst.setOpcode(ARM::t2PLDi12);
+ Inst.addOperand(MCOperand::CreateReg(ARM::PC));
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ int imm = fieldFromInstruction32(Insn, 0, 12);
+ if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+ }
+
+ unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
+ addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+ if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 9);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 8, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (Val == 0)
+ imm = INT32_MIN;
+ else if (!(Val & 0x100))
+ imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 9);
+
+ // Some instructions always use an additive offset.
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ case ARM::t2LDRBT:
+ case ARM::t2LDRHT:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSHT:
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ imm |= 0x100;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+ addr |= Rn << 9;
+ unsigned load = fieldFromInstruction32(Insn, 20, 1);
+
+ if (!load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 12);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Inst.getOpcode() == ARM::tADDrSP) {
+ unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
+ Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ } else if (Inst.getOpcode() == ARM::tADDspr) {
+ unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(flags));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned add = fieldFromInstruction32(Insn, 4, 1);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(add));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address,
+ (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xA || Val == 0xB)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus
+DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+ if (pred == 0xE || pred == 0xF) {
+ unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+ switch (opc) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xf3bf8f4:
+ Inst.setOpcode(ARM::t2DSB);
+ break;
+ case 0xf3bf8f5:
+ Inst.setOpcode(ARM::t2DMB);
+ break;
+ case 0xf3bf8f6:
+ Inst.setOpcode(ARM::t2ISB);
+ break;
+ }
+
+ unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
+ }
+
+ unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+
+ if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+// Decode a shifted immediate operand. These basically consist
+// of an 8-bit value, and a 4-bit directive that specifies either
+// a splat operation or a rotation.
+static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+ if (ctrl == 0) {
+ unsigned byte = fieldFromInstruction32(Val, 8, 2);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ switch (byte) {
+ case 0:
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8)));
+ break;
+ case 3:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) |
+ (imm << 8) | imm));
+ break;
+ }
+ } else {
+ unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction32(Val, 7, 5);
+ unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ switch (Val) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xF: // SY
+ case 0xE: // ST
+ case 0xB: // ISH
+ case 0xA: // ISHST
+ case 0x7: // NSH
+ case 0x6: // NSHST
+ case 0x3: // OSH
+ case 0x2: // OSHST
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!Val) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder){
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+ if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+ if (Rm == 0xF) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ align = 4;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ align = 4;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction32(Insn, 5, 3);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction32(Insn, 5, 3);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ align = 4 << fieldFromInstruction32(Insn, 4, 2);
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ align = 4 << fieldFromInstruction32(Insn, 4, 2);
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned pred = fieldFromInstruction16(Insn, 4, 4);
+ // The InstPrinter needs to have the low bit of the predicate in
+ // the mask operand to be able to print it properly.
+ unsigned mask = fieldFromInstruction16(Insn, 0, 5);
+
+ if (pred == 0xF) {
+ pred = 0xE;
+ S = MCDisassembler::SoftFail;
+ }
+
+ if ((mask & 0xF) == 0) {
+ // Preserve the high bit of the mask, which is the low bit of
+ // the predicate.
+ mask &= 0x10;
+ mask |= 0x8;
+ S = MCDisassembler::SoftFail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateImm(mask));
+ return S;
+}
+
+static DecodeStatus
+DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+ if (Rt == Rt2)
+ Check(S, MCDisassembler::SoftFail);
+
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus
+DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+ if (sign1 != sign2) return MCDisassembler::Fail;
+
+ unsigned Val = fieldFromInstruction32(Insn, 0, 8);
+ Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+ Val |= sign1 << 12;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ // Shift of "asr #32" is not allowed in Thumb2 mode.
+ if (Val == 0x20) S = MCDisassembler::SoftFail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return S;
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 0000000..ccdac3e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,992 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMInstPrinter.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "ARMGenAsmWriter.inc"
+
+/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
+///
+/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
+static unsigned translateShiftImm(unsigned imm) {
+ if (imm == 0)
+ return 32;
+ return imm;
+}
+
+
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ unsigned Opcode = MI->getOpcode();
+
+ // Check for MOVs and print canonical forms, instead.
+ if (Opcode == ARM::MOVsr) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+ const MCOperand &MO3 = MI->getOperand(3);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+ printSBitModifierOperand(MI, 6, O);
+ printPredicateOperand(MI, 4, O);
+
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ O << ", " << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (Opcode == ARM::MOVsi) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
+ printSBitModifierOperand(MI, 5, O);
+ printPredicateOperand(MI, 3, O);
+
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printAnnotation(O, Annot);
+ return;
+ }
+
+
+ // A8.6.123 PUSH
+ if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}";
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // A8.6.122 POP
+ if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}";
+ printAnnotation(O, Annot);
+ return;
+ }
+
+
+ // A8.6.355 VPUSH
+ if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // A8.6.354 VPOP
+ if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (Opcode == ARM::tLDMIA) {
+ bool Writeback = true;
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg)
+ Writeback = false;
+ }
+
+ O << "\tldm";
+
+ printPredicateOperand(MI, 1, O);
+ O << '\t' << getRegisterName(BaseReg);
+ if (Writeback) O << "!";
+ O << ", ";
+ printRegisterList(MI, 3, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Thumb1 NOP
+ if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 &&
+ MI->getOperand(1).getReg() == ARM::R8) {
+ O << "\tnop";
+ printPredicateOperand(MI, 2, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ if (MO1.isExpr())
+ O << *MO1.getExpr();
+ else if (MO1.isImm())
+ O << "[pc, #" << MO1.getImm() << "]";
+ else
+ llvm_unreachable("Unknown LDR label operand?");
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << getRegisterName(MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+
+ O << ' ' << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+}
+
+void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << getRegisterName(MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+ O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+ if (!MO2.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ImmOffs;
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg()) << ", "
+ << getRegisterName(MO2.getReg()) << "]";
+}
+
+void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg()) << ", "
+ << getRegisterName(MO2.getReg()) << ", lsl #1]";
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM2PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM2PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs;
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+ << " #" << ShImm;
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+ if (MO2.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << getRegisterName(MO2.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
+}
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << '[' << getRegisterName(MO1.getReg());
+
+ if (MO2.getReg()) {
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg()) << ']';
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
+ O << ']';
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM3PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM3PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (MO1.getReg()) {
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << ImmOffs;
+}
+
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
+}
+
+void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg());
+}
+
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
+}
+
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+ .getImm());
+ O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ if (ImmOffs || Op == ARM_AM::sub) {
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ImmOffs * 4;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm()) {
+ // FIXME: Both darwin as and GNU as violate ARM docs here.
+ O << ", :" << (MO2.getImm() << 3);
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ O << "[" << getRegisterName(MO1.getReg()) << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getReg() == 0)
+ O << "!";
+ else
+ O << ", " << getRegisterName(MO.getReg());
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint32_t v = ~MO.getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+ assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+ O << '#' << lsb << ", #" << width;
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ bool isASR = (ShiftOp & (1 << 5)) != 0;
+ unsigned Amt = ShiftOp & 0x1f;
+ if (isASR)
+ O << ", asr #" << (Amt == 0 ? 32 : Amt);
+ else if (Amt)
+ O << ", lsl #" << Amt;
+}
+
+void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
+ O << ", lsl #" << Imm;
+}
+
+void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ // A shift amount of 32 is encoded as 0.
+ if (Imm == 0)
+ Imm = 32;
+ assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
+ O << ", asr #" << Imm;
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != OpNum) O << ", ";
+ O << getRegisterName(MI->getOperand(i).getReg());
+ }
+ O << "}";
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ if (Op.getImm())
+ O << "be";
+ else
+ O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned IFlags = Op.getImm();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ O << ARM_PROC::IFlagsToString(1 << i);
+
+ if (IFlags == 0)
+ O << "none";
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned SpecRegRBit = Op.getImm() >> 4;
+ unsigned Mask = Op.getImm() & 0xf;
+
+ if (getAvailableFeatures() & ARM::FeatureMClass) {
+ switch (Op.getImm()) {
+ default: assert(0 && "Unexpected mask value!");
+ case 0: O << "apsr"; return;
+ case 1: O << "iapsr"; return;
+ case 2: O << "eapsr"; return;
+ case 3: O << "xpsr"; return;
+ case 5: O << "ipsr"; return;
+ case 6: O << "epsr"; return;
+ case 7: O << "iepsr"; return;
+ case 8: O << "msp"; return;
+ case 9: O << "psp"; return;
+ case 16: O << "primask"; return;
+ case 17: O << "basepri"; return;
+ case 18: O << "basepri_max"; return;
+ case 19: O << "faultmask"; return;
+ case 20: O << "control"; return;
+ }
+ }
+
+ // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
+ // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
+ if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
+ O << "APSR_";
+ switch (Mask) {
+ default: assert(0);
+ case 4: O << "g"; return;
+ case 8: O << "nzcvq"; return;
+ case 12: O << "nzcvqg"; return;
+ }
+ llvm_unreachable("Unexpected mask value!");
+ }
+
+ if (SpecRegRBit)
+ O << "SPSR";
+ else
+ O << "CPSR";
+
+ if (Mask) {
+ O << '_';
+ if (Mask & 8) O << 'f';
+ if (Mask & 4) O << 's';
+ if (Mask & 2) O << 'x';
+ if (Mask & 1) O << 'c';
+ }
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNum).getReg()) {
+ assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+ "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << MI->getOperand(OpNum).getImm() << "}";
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << "#" << (Imm == 0 ? 32 : Imm);
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned RegNum = MO2.getReg())
+ O << ", " << getRegisterName(RegNum);
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O,
+ unsigned Scale) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs * Scale;
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ unsigned Reg = MO1.getReg();
+ O << getRegisterName(Reg);
+
+ // Print the shift opc.
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Don't print +0.
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << ", #" << OffImm * 4;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm())
+ O << ", #" << MO2.getImm() * 4;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else
+ O << ", #" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ // Don't print +0.
+ if (OffImm != 0) {
+ O << ", ";
+ if (OffImm < 0)
+ O << "#-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << "#" << OffImm * 4;
+ }
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ assert(MO2.getReg() && "Invalid so_reg load / store address!");
+ O << ", " << getRegisterName(MO2.getReg());
+
+ unsigned ShAmt = MO3.getImm();
+ if (ShAmt) {
+ assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+ O << ", lsl #" << ShAmt;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#' << ARM_AM::getFPImmFloat(MO.getImm());
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
+}
+
+void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << "#" << Imm + 1;
+}
+
+void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ O << ", ror #";
+ switch (Imm) {
+ default: assert (0 && "illegal ror immediate!");
+ case 1: O << "8"; break;
+ case 2: O << "16"; break;
+ case 3: O << "24"; break;
+ }
+}
+
+void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 0000000..5c2173f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,136 @@
+//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned Scale);
+ void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
new file mode 100644
index 0000000..9982fa6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -0,0 +1,667 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ sub = 0,
+ add
+ };
+
+ static inline const char *getAddrOpcStr(AddrOpc Op) {
+ return Op == sub ? "-" : "";
+ }
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror: return 3;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should ignore the low 6 bits, then
+ // retry the hunt.
+ if (Imm & 63U) {
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+ unsigned RotAmt2 = TZ2 & ~1;
+ if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+ // 16-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~65535U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImm16ShiftedVal - Return true if the specified value can be
+ /// obtained by left shifting a 16-bit immediate.
+ static inline bool isThumbImm16ShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~65535U << getThumbImm16ValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+
+ /// getT2SOImmValSplat - Return the 12-bit encoded representation
+ /// if the specified value can be obtained by splatting the low 8 bits
+ /// into every other byte or every byte of a 32-bit value. i.e.,
+ /// 00000000 00000000 00000000 abcdefgh control = 0
+ /// 00000000 abcdefgh 00000000 abcdefgh control = 1
+ /// abcdefgh 00000000 abcdefgh 00000000 control = 2
+ /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
+ /// Return -1 if none of the above apply.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValSplatVal(unsigned V) {
+ unsigned u, Vs, Imm;
+ // control = 0
+ if ((V & 0xffffff00) == 0)
+ return V;
+
+ // If the value is zeroes in the first byte, just shift those off
+ Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+ // Any passing value only has 8 bits of payload, splatted across the word
+ Imm = Vs & 0xff;
+ // Likewise, any passing values have the payload splatted into the 3rd byte
+ u = Imm | (Imm << 16);
+
+ // control = 1 or 2
+ if (Vs == u)
+ return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+ // control = 3
+ if (Vs == (u | (u << 8)))
+ return (3 << 8) | Imm;
+
+ return -1;
+ }
+
+ /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+ /// specified value is a rotated 8-bit value. Return -1 if no rotation
+ /// encoding is possible.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValRotateVal(unsigned V) {
+ unsigned RotAmt = CountLeadingZeros_32(V);
+ if (RotAmt >= 24)
+ return -1;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ if ((rotr32(0xff000000U, RotAmt) & V) == V)
+ return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+ return -1;
+ }
+
+ /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+ /// encoding for it. If not, return -1.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmVal(unsigned Arg) {
+ // If 'Arg' is an 8-bit splat, then get the encoded value.
+ int Splat = getT2SOImmValSplatVal(Arg);
+ if (Splat != -1)
+ return Splat;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ int Rot = getT2SOImmValRotateVal(Arg);
+ if (Rot != -1)
+ return Rot;
+
+ return -1;
+ }
+
+ static inline unsigned getT2SOImmValRotate(unsigned V) {
+ if ((V & ~255U) == 0) return 0;
+ // Use CTZ to compute the rotate amount.
+ unsigned RotAmt = CountTrailingZeros_32(V);
+ return (32 - RotAmt) & 31;
+ }
+
+ static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+ unsigned V = Imm;
+ // Passing values can be any combination of splat values and shifter
+ // values. If this can be handled with a single shifter or splat, bail
+ // out. Those should be handled directly, not with a two-part val.
+ if (getT2SOImmValSplatVal(V) != -1)
+ return false;
+ V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Likewise, try masking out a splat value first.
+ V = Imm;
+ if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+ V &= ~0xff00ff00U;
+ else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+ V &= ~0x00ff00ffU;
+ // If what's left can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Otherwise, do not accept.
+ return false;
+ }
+
+ static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+ assert (isT2SOImmTwoPartVal(Imm) &&
+ "Immedate cannot be encoded as two part immediate!");
+ // Try a shifter operand as one part
+ unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+ // If the rest is encodable as an immediate, then return it.
+ if (getT2SOImmVal(V) != -1) return V;
+
+ // Try masking out a splat value first.
+ if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+ return Imm & 0xff00ff00U;
+
+ // The other splat is all that's left as an option.
+ assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+ return Imm & 0x00ff00ffU;
+ }
+
+ static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+ // Mask out the first hunk
+ Imm ^= getT2SOImmTwoPartFirst(Imm);
+ // Return what's left
+ assert (getT2SOImmVal(Imm) != -1 &&
+ "Unable to encode second part of T2 two part SO immediate");
+ return Imm;
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+ // fourth operand 16-17 encodes the index mode.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+ unsigned IdxMode = 0) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)((AM2Opc >> 13) & 7);
+ }
+ static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+ return (AM2Opc >> 16);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+ // index mode.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+ unsigned IdxMode = 0) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset | (IdxMode << 9);
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+ static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+ return (AM3Opc >> 9);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+ return (int)SubMode;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The second operand encodes the
+ // operation in bit 8 and the immediate in bits 0-7.
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #6
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for NEON load / store instructions.
+ //
+ // addrmode6 := reg with optional alignment
+ //
+ // This is stored in two operands [regaddr, align]. The first is the
+ // address register. The second operand is the value of the alignment
+ // specifier in bytes or zero if no explicit alignment.
+ // Valid alignments depend on the specific instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ assert(false && "Unsupported NEON immediate");
+ }
+ return Val;
+ }
+
+ AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
+ //===--------------------------------------------------------------------===//
+ // Floating-point Immediates
+ //
+ static inline float getFPImmFloat(unsigned Imm) {
+ // We expect an 8-bit binary encoding of a floating-point number here.
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ uint8_t Sign = (Imm >> 7) & 0x1;
+ uint8_t Exp = (Imm >> 4) & 0x7;
+ uint8_t Mantissa = Imm & 0xf;
+
+ // 8-bit FP iEEEE Float Encoding
+ // abcd efgh aBbbbbbc defgh000 00000000 00000000
+ //
+ // where B = NOT(b);
+
+ FPUnion.I = 0;
+ FPUnion.I |= Sign << 31;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ FPUnion.I |= (Exp & 0x3) << 23;
+ FPUnion.I |= Mantissa << 19;
+ return FPUnion.F;
+ }
+
+ /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP32Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP32Imm(const APFloat &FPImm) {
+ return getFP32Imm(FPImm.bitcastToAPInt());
+ }
+
+ /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP64Imm(const APInt &Imm) {
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffULL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP64Imm(const APFloat &FPImm) {
+ return getFP64Imm(FPImm.bitcastToAPInt());
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
new file mode 100644
index 0000000..c31c5e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -0,0 +1,531 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ ARMELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
+ bool isThumbMode; // Currently emitting Thumb code.
+public:
+ ARMAsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.startswith("thumb")) {}
+
+ ~ARMAsmBackend() {
+ delete STI;
+ }
+
+ unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+ bool hasNOP() const {
+ return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16", 0, 20, 0 },
+{ "fixup_arm_movw_lo16", 0, 20, 0 },
+{ "fixup_t2_movt_hi16", 0, 20, 0 },
+{ "fixup_t2_movw_lo16", 0, 20, 0 },
+{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
+ }
+ }
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME: Thumb targets, different move constant targets..
+ return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+ return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
+ const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
+ const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
+ const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
+ if (isThumb()) {
+ const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
+ : Thumb1_16bitNopEncoding;
+ uint64_t NumNops = Count / 2;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write16(nopEncoding);
+ if (Count & 1)
+ OW->Write8(0);
+ return true;
+ }
+ // ARM mode
+ const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
+ : ARMv4_NopEncoding;
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(nopEncoding);
+ // FIXME: should this function return false when unable to write exactly
+ // 'Count' bytes with NOP encodings?
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ }
+
+ return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case ARM::fixup_arm_movt_hi16:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned Lo12 = Value & 0x0FFF;
+ // inst{19-16} = Hi4;
+ // inst{11-0} = Lo12;
+ Value = (Hi4 << 16) | (Lo12);
+ return Value;
+ }
+ case ARM::fixup_t2_movt_hi16:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel: //FIXME: Shouldn't this be shifted like
+ // the other hi16 fixup?
+ case ARM::fixup_t2_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned i = (Value & 0x800) >> 11;
+ unsigned Mid3 = (Value & 0x700) >> 8;
+ unsigned Lo8 = Value & 0x0FF;
+ // inst{19-16} = Hi4;
+ // inst{26} = i;
+ // inst{14-12} = Mid3;
+ // inst{7-0} = Lo8;
+ Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_ldst_pcrel_12:
+ // ARM PC-relative values are offset by 8.
+ Value -= 4;
+ // FALLTHROUGH
+ case ARM::fixup_t2_ldst_pcrel_12: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value -= 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return ((Value - 4) >> 2) & 0xff;
+ case ARM::fixup_arm_adr_pcrel_12: {
+ // ARM PC-relative values are offset by 8.
+ Value -= 8;
+ unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 2; // 0b0010
+ }
+ assert(ARM_AM::getSOImmVal(Value) != -1 &&
+ "Out of range pc-relative fixup value!");
+ // Encode the immediate and shift the opcode into place.
+ return ARM_AM::getSOImmVal(Value) | (opc << 21);
+ }
+
+ case ARM::fixup_t2_adr_pcrel_12: {
+ Value -= 4;
+ unsigned opc = 0;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 5;
+ }
+
+ uint32_t out = (opc << 21);
+ out |= (Value & 0x800) << 15;
+ out |= (Value & 0x700) << 4;
+ out |= (Value & 0x0FF);
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ // These values don't encode the low two bits since they're always zero.
+ // Offset by 8 just as above.
+ return 0xffffff & ((Value - 8) >> 2);
+ case ARM::fixup_t2_uncondbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint32_t out = 0;
+ bool I = Value & 0x800000;
+ bool J1 = Value & 0x400000;
+ bool J2 = Value & 0x200000;
+ J1 ^= I;
+ J2 ^= I;
+
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_t2_condbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint64_t out = 0;
+ out |= (Value & 0x80000) << 7; // S bit
+ out |= (Value & 0x40000) >> 7; // J2 bit
+ out |= (Value & 0x20000) >> 4; // J1 bit
+ out |= (Value & 0x1F800) << 5; // imm6 field
+ out |= (Value & 0x007FF); // imm11 field
+
+ uint32_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_thumb_bl: {
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0x3fffff & ((Value - 4) >> 1);
+ Binary = (Value & 0x7ff) << 16; // Low imm11 value.
+ Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_blx: {
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+ // positions in the destination opcode. x = unchanged, I = immediate value
+ // bit, S = sign extension bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0xfffff & ((Value - 2) >> 2);
+ Binary = (Value & 0x3ff) << 17; // Low imm10L value.
+ Binary |= (Value & 0xffc00) >> 10; // High imm10H value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_cp:
+ // Offset by 4, and don't encode the low two bits. Two bytes of that
+ // 'off by 4' is implicitly handled by the half-word ordering of the
+ // Thumb encoding, so we only need to adjust by 2 here.
+ return ((Value - 2) >> 2) & 0xff;
+ case ARM::fixup_arm_thumb_cb: {
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ uint32_t Binary = (Value - 4) >> 1;
+ return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+ }
+ case ARM::fixup_arm_thumb_br:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0x7ff;
+ case ARM::fixup_arm_thumb_bcc:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_10: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low two bits since they're always zero.
+ Value >>= 2;
+ assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_pcrel_10) {
+ uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ Triple::OSType _OSType)
+ : ARMAsmBackend(T, TT), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ true);
+ }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = 4; // FIXME: 2 for Thumb
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return 1;
+
+ case FK_Data_2:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ return 2;
+
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ return 3;
+
+ case FK_Data_4:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ return 4;
+ }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ if (TheTriple.getArchName() == "armv4t" ||
+ TheTriple.getArchName() == "thumbv4t")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T);
+ else if (TheTriple.getArchName() == "armv5e" ||
+ TheTriple.getArchName() == "thumbv5e")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ);
+ else if (TheTriple.getArchName() == "armv6" ||
+ TheTriple.getArchName() == "thumbv6")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
+ }
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on ARM");
+
+ return new ELFARMAsmBackend(T, TT, Triple(TT).getOS());
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
new file mode 100644
index 0000000..ec4b6ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -0,0 +1,449 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "ARMMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+namespace ARM_PROC {
+ enum IMod {
+ IE = 2,
+ ID = 3
+ };
+
+ enum IFlags {
+ F = 1,
+ I = 2,
+ A = 4
+ };
+
+ inline static const char *IFlagsToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown iflags operand");
+ case F: return "f";
+ case I: return "i";
+ case A: return "a";
+ }
+ }
+
+ inline static const char *IModToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown imod operand");
+ case IE: return "ie";
+ case ID: return "id";
+ }
+ }
+}
+
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ SY = 15,
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory operation");
+ case SY: return "sy";
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown ARM register!");
+ case R0: case S0: case D0: case Q0: return 0;
+ case R1: case S1: case D1: case Q1: return 1;
+ case R2: case S2: case D2: case Q2: return 2;
+ case R3: case S3: case D3: case Q3: return 3;
+ case R4: case S4: case D4: case Q4: return 4;
+ case R5: case S5: case D5: case Q5: return 5;
+ case R6: case S6: case D6: case Q6: return 6;
+ case R7: case S7: case D7: case Q7: return 7;
+ case R8: case S8: case D8: case Q8: return 8;
+ case R9: case S9: case D9: case Q9: return 9;
+ case R10: case S10: case D10: case Q10: return 10;
+ case R11: case S11: case D11: case Q11: return 11;
+ case R12: case S12: case D12: case Q12: return 12;
+ case SP: case S13: case D13: case Q13: return 13;
+ case LR: case S14: case D14: case Q14: return 14;
+ case PC: case S15: case D15: case Q15: return 15;
+
+ case S16: case D16: return 16;
+ case S17: case D17: return 17;
+ case S18: case D18: return 18;
+ case S19: case D19: return 19;
+ case S20: case D20: return 20;
+ case S21: case D21: return 21;
+ case S22: case D22: return 22;
+ case S23: case D23: return 23;
+ case S24: case D24: return 24;
+ case S25: case D25: return 25;
+ case S26: case D26: return 26;
+ case S27: case D27: return 27;
+ case S28: case D28: return 28;
+ case S29: case D29: return 29;
+ case S30: case D30: return 30;
+ case S31: case D31: return 31;
+ }
+}
+
+/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+
+ /// ARM Index Modes
+ enum IndexMode {
+ IndexModeNone = 0,
+ IndexModePre = 1,
+ IndexModePost = 2,
+ IndexModeUpd = 3
+ };
+
+ /// ARM Addressing Modes
+ enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrMode6 = 6,
+ AddrModeT1_1 = 7,
+ AddrModeT1_2 = 8,
+ AddrModeT1_4 = 9,
+ AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
+ AddrModeT2_i12 = 11,
+ AddrModeT2_i8 = 12,
+ AddrModeT2_so = 13,
+ AddrModeT2_pc = 14, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16
+ };
+
+ inline static const char *AddrModeToString(AddrMode addrmode) {
+ switch (addrmode) {
+ default: llvm_unreachable("Unknown memory operation");
+ case AddrModeNone: return "AddrModeNone";
+ case AddrMode1: return "AddrMode1";
+ case AddrMode2: return "AddrMode2";
+ case AddrMode3: return "AddrMode3";
+ case AddrMode4: return "AddrMode4";
+ case AddrMode5: return "AddrMode5";
+ case AddrMode6: return "AddrMode6";
+ case AddrModeT1_1: return "AddrModeT1_1";
+ case AddrModeT1_2: return "AddrModeT1_2";
+ case AddrModeT1_4: return "AddrModeT1_4";
+ case AddrModeT1_s: return "AddrModeT1_s";
+ case AddrModeT2_i12: return "AddrModeT2_i12";
+ case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_so: return "AddrModeT2_so";
+ case AddrModeT2_pc: return "AddrModeT2_pc";
+ case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+ case AddrMode_i12: return "AddrMode_i12";
+ }
+ }
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16,
+
+ /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY,
+
+ /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+ MO_HI16_NONLAZY,
+
+ /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY_PIC,
+
+ /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movt instruction.
+ MO_HI16_NONLAZY_PIC,
+
+ /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+ /// call operand.
+ MO_PLT
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
+ IndexModeShift = 5,
+ IndexModeMask = 3 << IndexModeShift,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 7,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 13,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 14,
+
+ // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb
+ // instruction. Used by the parser to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
+ ThumbArithFlagSetting = 1 << 18,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 15,
+ DomainMask = 7 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
new file mode 100644
index 0000000..350c92d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+ // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+ // addresses
+ fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+ // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+ // the 16-bit halfwords reordered.
+ fixup_t2_ldst_pcrel_12,
+
+ // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where the lower 2 bits are not encoded
+ // (so it's encoded as an 8-bit immediate).
+ fixup_arm_pcrel_10,
+ // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_10,
+ // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+ // addresses where the lower 2 bits are not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_thumb_adr_pcrel_10,
+ // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_arm_adr_pcrel_12,
+ // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_t2_adr_pcrel_12,
+ // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+ // instructions.
+ fixup_arm_condbranch,
+ // fixup_arm_uncondbranch - 24-bit PC relative relocation for
+ // branch instructions. (unconditional)
+ fixup_arm_uncondbranch,
+ // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+ // uconditional branch instructions.
+ fixup_t2_condbranch,
+ // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+ // branch unconditional branch instructions.
+ fixup_t2_uncondbranch,
+
+ // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ fixup_arm_thumb_br,
+
+ // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ fixup_arm_thumb_bl,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ fixup_arm_thumb_blx,
+
+ // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ fixup_arm_thumb_cb,
+
+ // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ fixup_arm_thumb_cp,
+
+ // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ fixup_arm_thumb_bcc,
+
+ // The next two are for the movt/movw pair
+ // the 16bit imm field are split into imm{15-12} and imm{11-0}
+ fixup_arm_movt_hi16, // :upper16:
+ fixup_arm_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
+
+ // It is possible to create an "immediate" that happens to be pcrel.
+ // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8)
+ // result in different reloc tags than the above two.
+ // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+ fixup_arm_movt_hi16_pcrel, // :upper16:
+ fixup_arm_movw_lo16_pcrel, // :lower16:
+ fixup_t2_movt_hi16_pcrel, // :upper16:
+ fixup_t2_movw_lo16_pcrel, // :lower16:
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
new file mode 100644
index 0000000..1c109e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables"),
+ cl::init(false));
+
+
+static const char *const arm_asm_table[] = {
+ "{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0
+};
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+ AsmTransCBE = arm_asm_table;
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::SjLj;
+}
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ PrivateGlobalPrefix = ".L";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
+ WeakRefDirective = "\t.weak\t";
+ LCOMMDirectiveType = LCOMM::NoAlignment;
+
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ if (EnableARMEHABI)
+ ExceptionsType = ExceptionHandling::ARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
new file mode 100644
index 0000000..90f7822
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit ARMMCAsmInfoDarwin();
+ };
+
+ struct ARMELFMCAsmInfo : public MCAsmInfo {
+ explicit ARMELFMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
new file mode 100644
index 0000000..865c3e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1468 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+
+public:
+ ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti) {
+ }
+
+ ~ARMMCCodeEmitter() {}
+
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+ }
+ bool isThumb2() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+ }
+ bool isTargetDarwin() const {
+ Triple TT(STI.getTargetTriple());
+ Triple::OSType OS = TT.getOS();
+ return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+ }
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+ /// the specified operand. This is used for operands with :lower16: and
+ /// :upper16: prefixes.
+ uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+ unsigned &Reg, unsigned &Imm,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+ /// BL branch target.
+ uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+ /// BLX branch target.
+ uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// immediate Thumb2 direct branch target.
+ uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+ /// ADR label target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+ uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups)const;
+
+ /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+ /// operand.
+ uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+ /// operand as needed by load/store instructions.
+ uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+ uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::da: return 0;
+ case ARM_AM::ia: return 1;
+ case ARM_AM::db: return 2;
+ case ARM_AM::ib: return 3;
+ }
+ }
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+ switch (ShOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::no_shift:
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::asr: return 2;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+ }
+
+ /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+ uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+ uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getPostIdxRegOpValue - Return encoding for postidx_reg operands.
+ uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+ uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+ uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+ uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+ uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getCCOutOpValue - Return encoding of the 's' bit.
+ unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+ // '1' respectively.
+ return MI.getOperand(Op).getReg() == ARM::CPSR;
+ }
+
+ /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+ }
+
+ /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
+ assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+ return Encoded;
+ }
+
+ unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getSORegOpValue - Return an encoded so_reg shifted register value.
+ unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+ }
+
+ unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ unsigned VFPThumb2PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+ // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+ // set to 1111.
+ unsigned Bit24 = EncodedValue & 0x01000000;
+ unsigned Bit28 = Bit24 << 4;
+ EncodedValue &= 0xEFFFFFFF;
+ EncodedValue |= Bit28;
+ EncodedValue |= 0x0F000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0xF0FFFFFF;
+ EncodedValue |= 0x09000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0x00FFFFFF;
+ EncodedValue |= 0xEE000000;
+ }
+
+ return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0x0FFFFFFF;
+ EncodedValue |= 0xE0000000;
+ }
+ return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+
+ // Q registers are encoded as 2x their register number.
+ switch (Reg) {
+ default:
+ return RegNo;
+ case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3:
+ case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7:
+ case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11:
+ case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+ return 2 * RegNo;
+ }
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+ unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+ Reg = getARMRegisterNumbering(MO.getReg());
+
+ int32_t SImm = MO1.getImm();
+ bool isAdd = true;
+
+ // Special value for #-0
+ if (SImm == INT32_MIN) {
+ SImm = 0;
+ isAdd = false;
+ }
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (SImm < 0) {
+ SImm = -SImm;
+ isAdd = false;
+ }
+
+ Imm = SImm;
+ return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() && "Unexpected branch target type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are
+// determined by negating them and XOR'ing them with bit 23.
+static int32_t encodeThumbBLOffset(int32_t offset) {
+ offset >>= 1;
+ uint32_t S = (offset & 0x800000) >> 23;
+ uint32_t J1 = (offset & 0x400000) >> 22;
+ uint32_t J2 = (offset & 0x200000) >> 21;
+ J1 = (~J1 & 0x1);
+ J2 = (~J2 & 0x1);
+ J1 ^= S;
+ J2 ^= S;
+
+ offset &= ~0x600000;
+ offset |= J1 << 22;
+ offset |= J2 << 21;
+
+ return offset;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br,
+ Fixups);
+ return (MO.getImm() >> 1);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc,
+ Fixups);
+ return (MO.getImm() >> 1);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+ return (MO.getImm() >> 1);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+ int NumOp = MI.getNumOperands();
+ if (NumOp >= 2) {
+ for (int i = 0; i < NumOp-1; ++i) {
+ const MCOperand &MCOp1 = MI.getOperand(i);
+ const MCOperand &MCOp2 = MI.getOperand(i + 1);
+ if (MCOp1.isImm() && MCOp2.isReg() &&
+ (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: This really, really shouldn't use TargetMachine. We don't want
+ // coupling between MC and TM anywhere we can help it.
+ if (isThumb2())
+ return
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+ return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+
+ return MO.getImm() >> 1;
+}
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Val =
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ bool I = (Val & 0x800000);
+ bool J1 = (Val & 0x400000);
+ bool J2 = (Val & 0x200000);
+ if (I ^ J1)
+ Val &= ~0x400000;
+ else
+ Val |= 0x400000;
+
+ if (I ^ J2)
+ Val &= ~0x200000;
+ else
+ Val |= 0x200000;
+
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+ int32_t offset = MO.getImm();
+ uint32_t Val = 0x2000;
+ if (offset < 0) {
+ Val = 0x1000;
+ offset *= -1;
+ }
+ Val |= offset;
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+ int32_t Val = MO.getImm();
+ if (Val < 0) {
+ Val *= -1;
+ Val |= 0x1000;
+ }
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+ return MO.getImm();
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &) const {
+ // [Rn, Rm]
+ // {5-3} = Rm
+ // {2-0} = Rn
+ const MCOperand &MO1 = MI.getOperand(OpIdx);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ unsigned Reg, Imm12;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm12 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ Reg = ARM::PC;
+ int32_t Offset = MO.getImm();
+ if (Offset < 0) {
+ Offset *= -1;
+ isAdd = false;
+ }
+ Imm12 = Offset;
+ }
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+ uint32_t Binary = Imm12 & 0xfff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+}
+
+/// getT2Imm8s4OpValue - Return encoding info for
+/// '+/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ int32_t Imm8 = MI.getOperand(OpIdx).getImm();
+ bool isAdd = Imm8 >= 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (Imm8 < 0)
+ Imm8 = -Imm8;
+
+ // Scaled by 4.
+ Imm8 /= 4;
+
+ uint32_t Binary = Imm8 & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+ uint32_t Binary = (Imm8 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
+/// 'reg + imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {11-8} = reg
+ // {7-0} = imm8
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm8 = MO1.getImm();
+ return (Reg << 8) | Imm8;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ default: assert(0 && "Unexpected expression type");
+ case MCExpr::SymbolRef: return false;
+ case MCExpr::Binary: return true;
+ }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {20-16} = imm{15-12}
+ // {11-0} = imm{11-0}
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ // Hi / lo 16 bits already extracted during earlier passes.
+ return static_cast<unsigned>(MO.getImm());
+
+ // Handle :upper16: and :lower16: assembly prefixes.
+ const MCExpr *E = MO.getExpr();
+ if (E->getKind() == MCExpr::Target) {
+ const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+ E = ARM16Expr->getSubExpr();
+
+ MCFixupKind Kind;
+ switch (ARM16Expr->getKind()) {
+ default: assert(0 && "Unsupported ARMFixup");
+ case ARMMCExpr::VK_ARM_HI16:
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movt_hi16_pcrel
+ : ARM::fixup_arm_movt_hi16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
+ break;
+ case ARMMCExpr::VK_ARM_LO16:
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ break;
+ }
+ Fixups.push_back(MCFixup::Create(0, E, Kind));
+ return 0;
+ };
+
+ llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+ return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+ bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+ unsigned SBits = getShiftOp(ShOp);
+
+ // {16-13} = Rn
+ // {12} = isAdd
+ // {11-0} = shifter
+ // {3-0} = Rm
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+ uint32_t Binary = Rm;
+ Binary |= Rn << 13;
+ Binary |= SBits << 5;
+ Binary |= ShImm << 7;
+ if (isAdd)
+ Binary |= 1 << 12;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-14} Rn
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+ Binary |= Rn << 14;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+ bool isReg = MO.getReg() != 0;
+ uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+ if (isReg) {
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+ Binary <<= 7; // Shift amount is bits [11:7]
+ Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+ Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ }
+ return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {4} isAdd
+ // {3-0} Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ bool isAdd = MO1.getImm() != 0;
+ return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {9} 1 == imm8, 0 == Rm
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO.getReg());
+ return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm = MO2.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO1.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO1.getReg());
+ return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [SP, #imm]
+ // {7-0} = imm8
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+ "Unexpected base register!");
+
+ // The immediate is already shifted for the implicit zeroes, so no change
+ // here.
+ return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [Rn, #imm]
+ // {7-3} = imm5
+ // {2-0} = Rn
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm5 = MO1.getImm();
+ return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+ return (MO.getImm() >> 2);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is Rs, the amount to shift by, and the third specifies
+ // the type of the shift.
+ //
+ // {3-0} = Rm.
+ // {4} = 1
+ // {6-5} = type
+ // {11-8} = Rs
+ // {7} = 0
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ }
+ }
+
+ Binary |= SBits << 4;
+
+ // Encode the shift operation Rs.
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ // RRX - 110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ case ARM_AM::rrx:
+ Binary |= 0x60;
+ return Binary;
+ }
+
+ // Encode shift_imm bit[11:7].
+ Binary |= SBits << 4;
+ unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
+ assert(Offset && "Offset must be in range 1-32!");
+ if (Offset == 32) Offset = 0;
+ return Binary | (Offset << 7);
+}
+
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+ const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+ // Encoded as [Rn, Rm, imm].
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ Value <<= 4;
+ Value |= getARMRegisterNumbering(MO2.getReg());
+ Value <<= 2;
+ Value |= MO3.getImm();
+
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+ // Even though the immediate is 8 bits long, we need 9 bits in order
+ // to represent the (inverse of the) sign bit.
+ Value <<= 9;
+ int32_t tmp = (int32_t)MO2.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 4096; // Set the ADD bit
+ Value |= tmp & 4095;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::rrx: // FALLTHROUGH
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+ // msb of the mask.
+ const MCOperand &MO = MI.getOperand(Op);
+ uint32_t v = ~MO.getImm();
+ uint32_t lsb = CountTrailingZeros_32(v);
+ uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // VLDM/VSTM:
+ // {12-8} = Vd
+ // {7-0} = Number of registers
+ //
+ // LDM/STM:
+ // {15-0} = Bitfield of GPRs.
+ unsigned Reg = MI.getOperand(Op).getReg();
+ bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+
+ unsigned Binary = 0;
+
+ if (SPRRegs || DPRRegs) {
+ // VLDM/VSTM
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+ Binary |= (RegNo & 0x1f) << 8;
+ if (SPRRegs)
+ Binary |= NumRegs;
+ else
+ Binary |= NumRegs * 2;
+ } else {
+ for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+ unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ Binary |= 1 << RegNo;
+ }
+ }
+
+ return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x02; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+/// getAddrMode6OneLane32AddressOpValue - Encode an addrmode6 register number
+/// along with the alignment operand for use in VST1 and VLD1 with size 32.
+unsigned ARMMCCodeEmitter::
+getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8:
+ case 16: Align = 0x00; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions. This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.getReg() == 0) return 0x0D;
+ return MO.getReg();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Pseudo instructions don't get encoded.
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+ return;
+
+ int Size;
+ if (Desc.getSize() == 2 || Desc.getSize() == 4)
+ Size = Desc.getSize();
+ else
+ llvm_unreachable("Unexpected instruction size!");
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ if (isThumb() && Size == 4) {
+ EmitConstant(Binary >> 16, 2, OS);
+ EmitConstant(Binary & 0xffff, 2, OS);
+ } else
+ EmitConstant(Binary, Size, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
new file mode 100644
index 0000000..2727ba8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: assert(0 && "Invalid kind!");
+ case VK_ARM_HI16: OS << ":upper16:"; break;
+ case VK_ARM_LO16: OS << ":lower16:"; break;
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ assert(0 && "Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols_(BE->getLHS(), Asm);
+ AddValueSymbols_(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
new file mode 100644
index 0000000..0a2e883
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -0,0 +1,76 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_ARM_None,
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_HI16, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_LO16, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 0000000..a55c410
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,263 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMBaseInfo.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::string ARM_MC::ParseARMTriple(StringRef TT) {
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ unsigned Len = TT.size();
+ unsigned Idx = 0;
+
+ // FIXME: Enhance Triple helper class to extract ARM version.
+ bool isThumb = false;
+ if (Len >= 5 && TT.substr(0, 4) == "armv")
+ Idx = 4;
+ else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+ isThumb = true;
+ if (Len >= 7 && TT[5] == 'v')
+ Idx = 6;
+ }
+
+ std::string ARMArchFeature;
+ if (Idx) {
+ unsigned SubVer = TT[Idx];
+ if (SubVer >= '7' && SubVer <= '9') {
+ if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
+ // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+ } else
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ } else if (SubVer == '6') {
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+ ARMArchFeature = "+v6t2";
+ else if (Len >= Idx+2 && TT[Idx+1] == 'm')
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6t2,+noarm,+mclass";
+ else
+ ARMArchFeature = "+v6";
+ } else if (SubVer == '5') {
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchFeature = "+v5te";
+ else
+ ARMArchFeature = "+v5t";
+ } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
+ ARMArchFeature = "+v4t";
+ }
+
+ if (isThumb) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+thumb-mode";
+ else
+ ARMArchFeature += ",+thumb-mode";
+ }
+
+ Triple TheTriple(TT);
+ if (TheTriple.getOS() == Triple::NativeClient) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-mode";
+ else
+ ARMArchFeature += ",+nacl-mode";
+ }
+
+ return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
+ return X;
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitARMMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitARMMCRegisterInfo(X, ARM::LR);
+ return X;
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return new ARMMCAsmInfoDarwin();
+
+ return new ARMELFMCAsmInfo();
+}
+
+static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default) {
+ Triple TheTriple(TT);
+ // Default relocation model on Darwin is PIC, not DynamicNoPIC.
+ RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ }
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("ARM does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI, STI);
+ return 0;
+}
+
+namespace {
+
+class ARMMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // FIXME: This is not right for thumb.
+ return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ }
+};
+
+}
+
+static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ARMMCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget,
+ createARMMCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget,
+ createARMMCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 0000000..9b3d3bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,71 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCTARGETDESC_H
+#define ARMMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheARMTarget, TheThumbTarget;
+
+namespace ARM_MC {
+ std::string ParseARMTriple(StringRef TT);
+
+ /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
new file mode 100644
index 0000000..352c73e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -0,0 +1,388 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+ void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
+
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/true) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+ unsigned &Log2Size) {
+ RelocType = unsigned(macho::RIT_Vanilla);
+ Log2Size = ~0U;
+
+ switch (Kind) {
+ default:
+ return false;
+
+ case FK_Data_1:
+ Log2Size = llvm::Log2_32(1);
+ return true;
+ case FK_Data_2:
+ Log2Size = llvm::Log2_32(2);
+ return true;
+ case FK_Data_4:
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ case FK_Data_8:
+ Log2Size = llvm::Log2_32(8);
+ return true;
+
+ // Handle 24-bit branch kinds.
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ // Handle Thumb branches.
+ case ARM::fixup_arm_thumb_br:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(2);
+ return true;
+
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ }
+}
+
+void ARMMachObjectWriter::
+RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_ARM_Half;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint32_t Value2 = 0;
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_ARM_HalfDifference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+ //
+ // For these two r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ // the other half of the relocated expression is in the following pair
+ // relocation entry in the the low 16 bits of r_address field.
+ unsigned ThumbBit = 0;
+ unsigned MovtBit = 0;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ MovtBit = 1;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ MovtBit = 1;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ ThumbBit = 1;
+ break;
+ }
+
+
+ if (Type == macho::RIT_ARM_HalfDifference) {
+ uint32_t OtherHalf = MovtBit
+ ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((OtherHalf << 0) |
+ (macho::RIT_Pair << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_Difference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size;
+ unsigned RelocType = macho::RIT_Vanilla;
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+ report_fatal_error("unknown ARM fixup kind!");
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB()) {
+ if (RelocType == macho::RIT_ARM_Half ||
+ RelocType == macho::RIT_ARM_HalfDifference)
+ return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ }
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // FIXME: For other platforms, we need to use scattered relocations for
+ // internal relocations with offsets. If this is an internal relocation with
+ // an offset, it also needs a scattered relocation entry.
+ //
+ // Is this right for ARM?
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ Offset += 1 << Log2Size;
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // FIXME!
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ // The type is determined by the fixup kind.
+ Type = RelocType;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 0000000..2df0053
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,331 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+ struct MLxExpansion : public MachineFunctionPass {
+ static char ID;
+ MLxExpansion() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM MLA / MLS expansion pass";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool isA9;
+ unsigned MIIdx;
+ MachineInstr* LastMIs[4];
+ SmallPtrSet<MachineInstr*, 4> IgnoreStall;
+
+ void clearStack();
+ void pushStack(MachineInstr *MI);
+ MachineInstr *getAccDefMI(MachineInstr *MI) const;
+ unsigned getDefReg(MachineInstr *MI) const;
+ bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI);
+ void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane);
+ bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+ };
+ char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+ std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+ LastMIs[MIIdx] = MI;
+ if (++MIIdx == 4)
+ MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+ // Look past COPY and INSERT_SUBREG instructions to find the
+ // real definition MI. This is important for _sfp instructions.
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+ if (DefMI->getParent() != MBB)
+ break;
+ if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+ break;
+ }
+ return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+
+ while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+ Reg = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+ UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+ }
+
+ return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MCID.mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
+ return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
+ if (NumExpand >= ExpandLimit)
+ return false;
+
+ if (ForceExapnd)
+ return true;
+
+ MachineInstr *DefMI = getAccDefMI(MI);
+ if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
+ // r0 = vmla
+ // r3 = vmla r0, r1, r2
+ // takes 16 - 17 cycles
+ //
+ // r0 = vmla
+ // r4 = vmul r1, r2
+ // r3 = vadd r0, r4
+ // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ IgnoreStall.insert(DefMI);
+ return true;
+ }
+
+ if (IgnoreStall.count(MI))
+ return false;
+
+ // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+ // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+ // preserves the in-order retirement of the instructions.
+ // Look at the next few instructions, if *most* of them can cause hazards,
+ // then the scheduler can't *fix* this, we'd better break up the VMLA.
+ unsigned Limit1 = isA9 ? 1 : 4;
+ unsigned Limit2 = isA9 ? 1 : 4;
+ for (unsigned i = 1; i <= 4; ++i) {
+ int Idx = ((int)MIIdx - i + 4) % 4;
+ MachineInstr *NextMI = LastMIs[Idx];
+ if (!NextMI)
+ continue;
+
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+ if (i <= Limit1)
+ return true;
+ }
+
+ // Look for VMLx RAW hazard.
+ if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool DstDead = MI->getOperand(0).isDead();
+ unsigned AccReg = MI->getOperand(1).getReg();
+ unsigned Src1Reg = MI->getOperand(2).getReg();
+ unsigned Src2Reg = MI->getOperand(3).getReg();
+ bool Src1Kill = MI->getOperand(2).isKill();
+ bool Src2Kill = MI->getOperand(3).isKill();
+ unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+ unsigned NextOp = HasLane ? 5 : 4;
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+ unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+ const MCInstrDesc &MCID1 = TII->get(MulOpc);
+ const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+ unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
+
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ .addReg(Src1Reg, getKillRegState(Src1Kill))
+ .addReg(Src2Reg, getKillRegState(Src2Kill));
+ if (HasLane)
+ MIB.addImm(LaneImm);
+ MIB.addImm(Pred).addReg(PredReg);
+
+ MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+ if (NegAcc) {
+ bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+ MIB.addReg(TmpReg, getKillRegState(true))
+ .addReg(AccReg, getKillRegState(AccKill));
+ } else {
+ MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+ }
+ MIB.addImm(Pred).addReg(PredReg);
+
+ DEBUG({
+ dbgs() << "Expanding: " << *MI;
+ dbgs() << " to:\n";
+ MachineBasicBlock::iterator MII = MI;
+ MII = llvm::prior(MII);
+ MachineInstr &MI2 = *MII;
+ MII = llvm::prior(MII);
+ MachineInstr &MI1 = *MII;
+ dbgs() << " " << MI1;
+ dbgs() << " " << MI2;
+ });
+
+ MI->eraseFromParent();
+ ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ clearStack();
+ IgnoreStall.clear();
+
+ unsigned Skip = 0;
+ MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+ while (MII != E) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.isBarrier()) {
+ clearStack();
+ Skip = 0;
+ ++MII;
+ continue;
+ }
+
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainGeneral) {
+ if (++Skip == 2)
+ // Assume dual issues of non-VFP / NEON instructions.
+ pushStack(0);
+ } else {
+ Skip = 0;
+
+ unsigned MulOpc, AddSubOpc;
+ bool NegAcc, HasLane;
+ if (!TII->isFpMLxInstruction(MCID.getOpcode(),
+ MulOpc, AddSubOpc, NegAcc, HasLane) ||
+ !FindMLxHazard(MI))
+ pushStack(MI);
+ else {
+ ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+ E = MBB.rend(); // May have changed if MI was the 1st instruction.
+ Changed = true;
+ continue;
+ }
+ }
+
+ ++MII;
+ }
+
+ return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ isA9 = STI->isCortexA9();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= ExpandFPMLxInstructions(MBB);
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+ return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 0000000..500e3de
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() {
+ RegisterTarget<Triple::arm, /*HasJIT=*/true>
+ X(TheARMTarget, "arm", "ARM");
+
+ RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+ Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 0000000..d848177
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,366 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, MIFlags);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ MachineInstr::FrameSetup);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ NumBytes = DPRCSOffset;
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup));
+ if (NumBytes > 508)
+ // If offset is > 508 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes)
+ // Insert it after all the callee-save spills.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // Thumb1 does not currently support dynamic stack realignment. Report a
+ // fatal error rather then silently generate bad code.
+ if (RegInfo->needsStackRealignment(MF))
+ report_fatal_error("Dynamic stack realignment not supported for thumb1.");
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF))
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP));
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ if (MI->getOpcode() == ARM::tLDRspi &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert((MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot, the target is ELF and the function has FP, or
+ // the target uses var sized objects.
+ if (NumBytes) {
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ } else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ }
+ }
+
+ if (VARegSaveSize) {
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. Therefore, we
+ // pop the old LR into R3 as a temporary.
+
+ // Move back past the callee-saved register restoration
+ while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill));
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ MIB.setMIFlags(MachineInstr::FrameSetup);
+ return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(MIB);
+
+ bool NumRegs = false;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NumRegs = true;
+ }
+
+ // It's illegal to emit pop instruction without operands.
+ if (NumRegs)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 0000000..bcfc516
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMB_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 0000000..218311d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,100 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1InstrInfo.h"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+ if (RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+ if (RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 0000000..17ef2f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,59 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+ Thumb1RegisterInfo RI;
+public:
+ explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 0000000..e8ed482
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,706 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+const TargetRegisterClass*
+Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ if (ARM::tGPRRegClass.hasSubClassEq(RC))
+ return ARM::tGPRRegisterClass;
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+const TargetRegisterClass *
+Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return ARM::tGPRRegisterClass;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = MachineInstr::NoFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ bool isHigh = !isARMLowRegister(DestReg) ||
+ (BaseReg != 0 && !isARMLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+ ARMCC::AL, 0, MIFlags);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (Opc != ARM::tADDhirr)
+ MIB = AddDefaultT1CC(MIB);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ AddDefaultPred(MIB);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+ bool NeedCC = false;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ if (DestReg == ARM::SP) {
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ } else {
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NumBits = 8;
+ NeedCC = true;
+ }
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+ DestReg, BaseReg, NumBytes, true,
+ TII, MRI, MIFlags);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+ const MachineInstrBuilder MIB =
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
+ .setMIFlags(MIFlags));
+ AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+ } else {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill))
+ .setMIFlags(MIFlags);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(DestReg).addImm(ThisVal);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+ } else {
+ bool isKill = BaseReg != ARM::SP;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+
+ BaseReg = DestReg;
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NeedCC = isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc) {
+ const MCInstrDesc &MCID = TII.get(ExtraOpc);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+ .addReg(DestReg, RegState::Kill)
+ .addImm(((unsigned)NumBytes) & 3)
+ .setMIFlags(MIFlags));
+ }
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII,
+ const Thumb1RegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+ DestReg))
+ .addImm(ThisVal));
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
+ if (isSub) {
+ const MCInstrDesc &MCID = TII.get(ARM::tRSB);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+ .addReg(DestReg, RegState::Kill));
+ }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+ unsigned Op = i;
+ for (unsigned e = MI.getNumOperands(); i != e; ++i)
+ MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::tLDRspi:
+ return ARM::tLDRi;
+
+ case ARM::tSTRspi:
+ return ARM::tSTRi;
+ }
+
+ return Opcode;
+}
+
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+ if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset
+ MI.RemoveOperand(FrameRegIdx+1);
+ MachineInstrBuilder MIB(&MI);
+ return true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
+ removeOperands(MI, FrameRegIdx);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+ .addImm(Offset / Scale));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
+ }
+ return true;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
+ return true;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
+ removeOperands(MI, FrameRegIdx);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
+ }
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = llvm::next(II);
+ emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+ *this);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
+ MI.setDesc(TII.get(ARM::tADDhirr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
+ }
+ return true;
+ } else {
+ if (AddrMode != ARMII::AddrModeT1_s)
+ llvm_unreachable("Unsupported addressing mode!");
+
+ unsigned ImmIdx = FrameRegIdx + 1;
+ int InstrOffs = MI.getOperand(ImmIdx).getImm();
+ unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ unsigned Scale = 4;
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with the frame register (e.g., sp).
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ ImmOp.ChangeToImmediate(ImmedOffset);
+
+ // If we're using a register where sp was stored, convert the instruction
+ // to the non-SP version.
+ unsigned NewOpc = convertToNonSPOpcode(Opcode);
+ if (NewOpc != Opcode && FrameReg != ARM::SP)
+ MI.setDesc(TII.get(NewOpc));
+
+ return true;
+ }
+
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+ ImmOp.ChangeToImmediate(0);
+ } else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask * Scale);
+ }
+ }
+
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill));
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+
+ // Remove predicate first.
+ int PIdx = MI.findFirstPredOperandIdx();
+ if (PIdx != -1)
+ removeOperands(MI, PIdx);
+
+ if (Desc.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tLDRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+ UseRR = true;
+ }
+ } else {
+ emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+ *this);
+ }
+
+ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else if (Desc.mayStore()) {
+ VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ bool UseRR = false;
+
+ if (Opcode == ARM::tSTRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+ *this);
+ MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+ MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else {
+ assert(false && "Unexpected opcode!");
+ }
+
+ // Add predicate back if it's needed.
+ if (MI.getDesc().isPredicable()) {
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 0000000..9060e59
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,69 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
+
+ /// Code Generation virtual methods...
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 0000000..b627400
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,273 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ public:
+ static char ID;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Thumb IT blocks insertion pass";
+ }
+
+ private:
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
+ };
+ char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // If the CPSR is defined by this copy, then we don't want to move it. E.g.,
+ // if we have:
+ //
+ // movs r1, r1
+ // rsb r1, 0
+ // movs r2, r2
+ // rsb r2, 0
+ //
+ // we don't want this to be converted to:
+ //
+ // movs r1, r1
+ // movs r2, r2
+ // itt mi
+ // rsb r1, 0
+ // rsb r2, 0
+ //
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC == ARMCC::AL) {
+ ++MBBI;
+ continue;
+ }
+
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
+ // Insert an IT instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+ .addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
+ ++MBBI;
+
+ // Form IT block.
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ unsigned Mask = 0, Pos = 3;
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
+ }
+
+ // Finalize IT mask.
+ Mask |= (1 << Pos);
+ // Tag along (firstcond[0] << 4) with the mask.
+ Mask |= (CC & 1) << 4;
+ MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+ Modified = true;
+ ++NumITs;
+ }
+
+ return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+
+ if (!AFI->isThumbFunction())
+ return false;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+ MachineBasicBlock &MBB = *MFI;
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
+ }
+
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
+ return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+ return new Thumb2ITBlockPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 0000000..cf040c82
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,611 @@
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+ cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+ cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ // FIXME
+ return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
+ }
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
+ }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
+ unsigned PredReg = 0;
+ return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Handle SPR, DPR, and QPR copies.
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
+ RC == ARM::GPRnopcRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
+ RC == ARM::GPRnopcRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ // If profitable, use a movw or movt to materialize the offset.
+ // FIXME: Use the scavenger to grab a scratch register.
+ if (DestReg != ARM::SP && DestReg != BaseReg &&
+ NumBytes >= 4096 &&
+ ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+ bool Fits = false;
+ if (NumBytes < 65536) {
+ // Use a movw to materialize the 16-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+ .addImm(NumBytes)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ } else if ((NumBytes & 0xffff) == 0) {
+ // Use a movt to materialize the 32-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+ .addReg(DestReg)
+ .addImm(NumBytes >> 16)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ }
+
+ if (Fits) {
+ if (isSub) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addReg(DestReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ }
+ return;
+ }
+ }
+
+ while (NumBytes) {
+ unsigned ThisVal = NumBytes;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+ .addReg(BaseReg).setMIFlags(MIFlags));
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ bool HasCCOut = true;
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ } else {
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ HasCCOut = false;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ }
+
+ // Build the new ADD / SUB.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)).setMIFlags(MIFlags);
+ if (HasCCOut)
+ AddDefaultCC(MIB);
+
+ BaseReg = DestReg;
+ }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi12: return ARM::t2LDRi8;
+ case ARM::t2LDRHi12: return ARM::t2LDRHi8;
+ case ARM::t2LDRBi12: return ARM::t2LDRBi8;
+ case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+ case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+ case ARM::t2STRi12: return ARM::t2STRi8;
+ case ARM::t2STRBi12: return ARM::t2STRBi8;
+ case ARM::t2STRHi12: return ARM::t2STRHi8;
+
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi8: return ARM::t2LDRi12;
+ case ARM::t2LDRHi8: return ARM::t2LDRHi12;
+ case ARM::t2LDRBi8: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+ case ARM::t2STRi8: return ARM::t2STRi12;
+ case ARM::t2STRBi8: return ARM::t2STRBi12;
+ case ARM::t2STRHi8: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRs: return ARM::t2LDRi12;
+ case ARM::t2LDRHs: return ARM::t2LDRHi12;
+ case ARM::t2LDRBs: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+ case ARM::t2STRs: return ARM::t2STRi12;
+ case ARM::t2STRBs: return ARM::t2STRBi12;
+ case ARM::t2STRHs: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrModeT2_i12.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+ if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and remaining explicit predicate operands.
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ return true;
+ }
+
+ bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(ARM::t2ADDri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ Offset = 0;
+ return true;
+ }
+ // Another common case: imm12.
+ if (Offset < 4096 &&
+ (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+ unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ MI.setDesc(TII.get(NewOpc));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Remove the cc_out operand.
+ if (HasCCOut)
+ MI.RemoveOperand(MI.getNumOperands()-1);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, extract 8 adjacent bits from the immediate into this
+ // t2ADDri/t2SUBri.
+ unsigned RotAmt = CountLeadingZeros_32(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ } else {
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return false;
+
+ // AddrModeT2_so cannot handle any offset. If there is no offset
+ // register then we change to an immediate version.
+ unsigned NewOpc = Opcode;
+ if (AddrMode == ARMII::AddrModeT2_so) {
+ unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+ if (OffsetReg != 0) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ return Offset == 0;
+ }
+
+ MI.RemoveOperand(FrameRegIdx+1);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+ NewOpc = immediateOffsetOpcode(Opcode);
+ AddrMode = ARMII::AddrModeT2_i12;
+ }
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign convert Opcode to the appropriate
+ // instruction
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset < 0) {
+ NewOpc = negativeOffsetOpcode(Opcode);
+ NumBits = 8;
+ isSub = true;
+ Offset = -Offset;
+ } else {
+ NewOpc = positiveOffsetOpcode(Opcode);
+ NumBits = 12;
+ }
+ } else if (AddrMode == ARMII::AddrMode5) {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+ int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ Offset += InstrOffs * 4;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+ } else {
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ if (NewOpc != Opcode)
+ MI.setDesc(TII.get(NewOpc));
+
+ MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+ // Attempt to fold address computation
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with fp/sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else
+ ImmedOffset = -ImmedOffset;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, offset doesn't fit. Pull in what we can to simplify
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else {
+ ImmedOffset = -ImmedOffset;
+ if (ImmedOffset == 0)
+ // Change the opcode back if the encoded offset is zero.
+ MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+ }
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+ MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const {
+ if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
+ return;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return;
+
+ // Schedule the copy so it doesn't come between previous instructions
+ // and UseMI which can form an IT block.
+ unsigned SrcReg = SrcMI->getOperand(1).getReg();
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ MachineBasicBlock *MBB = UseMI->getParent();
+ MachineBasicBlock::iterator MBBI = SrcMI;
+ unsigned NumInsts = 0;
+ while (--MBBI != MBB->begin()) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ if (!(NCC == CC || NCC == OCC) ||
+ NMI->modifiesRegister(SrcReg, &TRI) ||
+ NMI->definesRegister(ARM::CPSR))
+ break;
+ if (++NumInsts == 4)
+ // Too many in a row!
+ return;
+ }
+
+ if (NumInsts) {
+ MBB->remove(SrcMI);
+ MBB->insert(++MBBI, SrcMI);
+ }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 0000000..f2637d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,78 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+ Thumb2RegisterInfo RI;
+public:
+ explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+ /// two-addrss instruction inserted by two-address pass.
+ void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 0000000..355c3bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,52 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 0000000..824378a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,43 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 0000000..89a155c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,890 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+ cl::init(-1), cl::Hidden);
+
+namespace {
+ /// ReduceTable - A static table with information on mapping from wide
+ /// opcodes to narrow
+ struct ReduceEntry {
+ unsigned WideOpc; // Wide opcode
+ unsigned NarrowOpc1; // Narrow opcode to transform to
+ unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint8_t Imm1Limit; // Limit of immediate field (bits)
+ uint8_t Imm2Limit; // Limit of immediate field when it's two-address
+ unsigned LowRegs1 : 1; // Only possible if low-registers are used
+ unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+ unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
+ // 1 - No cc field.
+ // 2 - Always set CPSR.
+ unsigned PredCC2 : 2;
+ unsigned PartFlag : 1; // 16-bit instruction does partial flag update
+ unsigned Special : 1; // Needs to be dealt with specially
+ };
+
+ static const ReduceEntry ReduceTable[] = {
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
+ // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+ // likely to cause issue in the loop. As a size / performance workaround,
+ // they are not marked as such.
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
+ };
+
+ class Thumb2SizeReduce : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2SizeReduce();
+
+ const Thumb2InstrInfo *TII;
+ const ARMSubtarget *STI;
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Thumb2 instruction size reduction pass";
+ }
+
+ private:
+ /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+ DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+ bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+
+ bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+ bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry);
+
+ bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR,
+ MachineInstr *CPSRDef);
+
+ /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+ /// instruction.
+ bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef);
+
+ /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+ /// non-two-address instruction.
+ bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef);
+
+ /// ReduceMBB - Reduce width of instructions in the specified basic block.
+ bool ReduceMBB(MachineBasicBlock &MBB);
+ };
+ char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+ unsigned FromOpc = ReduceTable[i].WideOpc;
+ if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ }
+}
+
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+ for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+ if (*Regs == ARM::CPSR)
+ return true;
+ return false;
+}
+
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+/// = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
+ if (!Def || !STI->avoidCPSRPartialUpdate())
+ return false;
+
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Def->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ Defs.insert(Reg);
+ }
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Use->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Defs.count(Reg))
+ return false;
+ }
+
+ // No read-after-write dependency. The narrowing will add false dependency.
+ return true;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead) {
+ if ((is2Addr && Entry.PredCC2 == 0) ||
+ (!is2Addr && Entry.PredCC1 == 0)) {
+ if (Pred == ARMCC::AL) {
+ // Not predicated, must set CPSR.
+ if (!HasCC) {
+ // Original instruction was not setting CPSR, but CPSR is not
+ // currently live anyway. It's ok to set it. The CPSR def is
+ // dead though.
+ if (!LiveCPSR) {
+ HasCC = true;
+ CCDead = true;
+ return true;
+ }
+ return false;
+ }
+ } else {
+ // Predicated, must not set CPSR.
+ if (HasCC)
+ return false;
+ }
+ } else if ((is2Addr && Entry.PredCC2 == 2) ||
+ (!is2Addr && Entry.PredCC1 == 2)) {
+ /// Old opcode has an optional def of CPSR.
+ if (HasCC)
+ return true;
+ // If old opcode does not implicitly define CPSR, then it's not ok since
+ // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+ if (!HasImplicitCPSRDef(MI->getDesc()))
+ return false;
+ HasCC = true;
+ } else {
+ // 16-bit instruction does not set CPSR.
+ if (HasCC)
+ return false;
+ }
+
+ return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
+ Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
+ Opc == ARM::t2LDMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+ bool isSPOk = isPCOk || isLROk;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ if (isPCOk && Reg == ARM::PC)
+ continue;
+ if (isLROk && Reg == ARM::LR)
+ continue;
+ if (Reg == ARM::SP) {
+ if (isSPOk)
+ continue;
+ if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+ // Special case for these ldr / str with sp as base register.
+ continue;
+ }
+ if (!isARMLowRegister(Reg))
+ return false;
+ }
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry) {
+ if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+ return false;
+
+ unsigned Scale = 1;
+ bool HasImmOffset = false;
+ bool HasShift = false;
+ bool HasOffReg = true;
+ bool isLdStMul = false;
+ unsigned Opc = Entry.NarrowOpc1;
+ unsigned OpNum = 3; // First 'rest' of operands.
+ uint8_t ImmLimit = Entry.Imm1Limit;
+
+ switch (Entry.WideOpc) {
+ default:
+ llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+ case ARM::t2LDRi12:
+ case ARM::t2STRi12:
+ if (MI->getOperand(1).getReg() == ARM::SP) {
+ Opc = Entry.NarrowOpc2;
+ ImmLimit = Entry.Imm2Limit;
+ HasOffReg = false;
+ }
+
+ Scale = 4;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRBi12:
+ case ARM::t2STRBi12:
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRHi12:
+ case ARM::t2STRHi12:
+ Scale = 2;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRSHs:
+ case ARM::t2STRs:
+ case ARM::t2STRBs:
+ case ARM::t2STRHs:
+ HasShift = true;
+ OpNum = 4;
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB: {
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
+ return false;
+
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+
+ if (!isOK)
+ return false;
+
+ OpNum = 0;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_RET: {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg != ARM::SP)
+ return false;
+ Opc = Entry.NarrowOpc2; // tPOP_RET
+ OpNum = 2;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ OpNum = 0;
+
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == ARM::SP &&
+ (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+ Entry.WideOpc == ARM::t2STMDB_UPD)) {
+ Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+ OpNum = 2;
+ } else if (!isARMLowRegister(BaseReg) ||
+ (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+ Entry.WideOpc != ARM::t2STMIA_UPD)) {
+ return false;
+ }
+
+ isLdStMul = true;
+ break;
+ }
+ }
+
+ unsigned OffsetReg = 0;
+ bool OffsetKill = false;
+ if (HasShift) {
+ OffsetReg = MI->getOperand(2).getReg();
+ OffsetKill = MI->getOperand(2).isKill();
+
+ if (MI->getOperand(3).getImm())
+ // Thumb1 addressing mode doesn't support shift.
+ return false;
+ }
+
+ unsigned OffsetImm = 0;
+ if (HasImmOffset) {
+ OffsetImm = MI->getOperand(2).getImm();
+ unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+ if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+ // Make sure the immediate field fits.
+ return false;
+ }
+
+ // Add the 16-bit load / store instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ if (!isLdStMul) {
+ MIB.addOperand(MI->getOperand(0));
+ MIB.addOperand(MI->getOperand(1));
+
+ if (HasImmOffset)
+ MIB.addImm(OffsetImm / Scale);
+
+ assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+ if (HasOffReg)
+ MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+ }
+
+ // Transfer the rest of operands.
+ for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++NumLdSts;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::t2ADDri) {
+ // If the source register is SP, try to reduce to tADDrSPi, otherwise
+ // it's a normal reduce.
+ if (MI->getOperand(1).getReg() != ARM::SP) {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ }
+ // Try to reduce to tADDrSPi.
+ unsigned Imm = MI->getOperand(2).getImm();
+ // The immediate must be in range, the destination register must be a low
+ // reg, the predicate must be "always" and the condition flags must not
+ // be being set.
+ if (Imm & 3 || Imm > 1020)
+ return false;
+ if (!isARMLowRegister(MI->getOperand(0).getReg()))
+ return false;
+ if (MI->getOperand(3).getImm() != ARMCC::AL)
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
+
+ MBB.erase(MI);
+ ++NumNarrows;
+ return true;
+ }
+
+ if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+ return false;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.mayLoad() || MCID.mayStore())
+ return ReduceLoadStore(MBB, MI, Entry);
+
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri:
+ case ARM::t2ADDSrr: {
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri: {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ return true;
+ // fallthrough
+ }
+ case ARM::t2ADDSrr:
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ }
+ }
+ break;
+ }
+ case ARM::t2RSBri:
+ case ARM::t2RSBSri:
+ case ARM::t2SXTB:
+ case ARM::t2SXTH:
+ case ARM::t2UXTB:
+ case ARM::t2UXTH:
+ if (MI->getOperand(2).getImm() == 0)
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ break;
+ case ARM::t2MOVi16:
+ // Can convert only 'pure' immediate operands, not immediates obtained as
+ // globals' addresses.
+ if (MI->getOperand(1).isImm())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ break;
+ case ARM::t2CMPrr: {
+ // Try to reduce to the lo-reg only version first. Why there are two
+ // versions of the instruction is a mystery.
+ // It would be nice to just have two entries in the master table that
+ // are prioritized, but the table assumes a unique entry for each
+ // source insn opcode. So for now, we hack a local entry record to use.
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ }
+ }
+ return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef) {
+
+ if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+ return false;
+
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+ return false;
+ if (Entry.Imm2Limit) {
+ unsigned Imm = MI->getOperand(2).getImm();
+ unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+ if (Imm > Limit)
+ return false;
+ } else {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+ return false;
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(CPSRDef, MI))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if (SkipPred && MCID.OpInfo[i].isPredicate())
+ continue;
+ MIB.addOperand(MI->getOperand(i));
+ }
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++Num2Addrs;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef) {
+ if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+ return false;
+
+ unsigned Limit = ~0U;
+ if (Entry.Imm1Limit)
+ Limit = (1 << Entry.Imm1Limit) - 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate())
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::CPSR)
+ continue;
+ if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+ return false;
+ } else if (MO.isImm() &&
+ !MCID.OpInfo[i].isPredicate()) {
+ if (((unsigned)MO.getImm()) > Limit)
+ return false;
+ }
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(CPSRDef, MI))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if ((MCID.getOpcode() == ARM::t2RSBSri ||
+ MCID.getOpcode() == ARM::t2RSBri ||
+ MCID.getOpcode() == ARM::t2SXTB ||
+ MCID.getOpcode() == ARM::t2SXTH ||
+ MCID.getOpcode() == ARM::t2UXTB ||
+ MCID.getOpcode() == ARM::t2UXTH) && i == 2)
+ // Skip the zero immediate operand, it's now implicit.
+ continue;
+ bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
+ if (SkipPred && isPred)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+ // Skip implicit def of CPSR. Either it's modeled as an optional
+ // def now or it's already an implicit def on the new instruction.
+ continue;
+ MIB.addOperand(MO);
+ }
+ if (!MCID.isPredicable() && NewMCID.isPredicable())
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++NumNarrows;
+ return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
+ bool HasDef = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+
+ DefCPSR = true;
+ if (!MO.isDead())
+ HasDef = true;
+ }
+
+ return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+ if (MO.isKill()) {
+ LiveCPSR = false;
+ break;
+ }
+ }
+
+ return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Yes, CPSR could be livein.
+ bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+ MachineInstr *CPSRDef = 0;
+
+ MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = llvm::next(MII);
+
+ MachineInstr *MI = &*MII;
+ LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI != ReduceOpcodeMap.end()) {
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+ // Ignore "special" cases for now.
+ if (Entry.Special) {
+ if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ }
+ goto ProcessNext;
+ }
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ goto ProcessNext;
+ }
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ }
+ }
+
+ ProcessNext:
+ bool DefCPSR = false;
+ LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+ if (MI->getDesc().isCall())
+ // Calls don't really set CPSR.
+ CPSRDef = 0;
+ else if (DefCPSR)
+ // This is the last CPSR defining instruction.
+ CPSRDef = MI;
+ }
+
+ return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ STI = &TM.getSubtarget<ARMSubtarget>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Modified |= ReduceMBB(*I);
+ return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+ return new Thumb2SizeReduce();
+}
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.h b/contrib/llvm/lib/Target/Alpha/Alpha.h
new file mode 100644
index 0000000..6ffaf45
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,43 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include "MCTargetDesc/AlphaMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ namespace Alpha {
+ // These describe LDAx
+
+ static const int IMM_LOW = -32768;
+ static const int IMM_HIGH = 32767;
+ static const int IMM_MULT = 65536;
+ }
+
+ class AlphaTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
+ FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+ FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+ JITCodeEmitter &JCE);
+ FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+ FunctionPass *createAlphaBranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.td b/contrib/llvm/lib/Target/Alpha/Alpha.td
new file mode 100644
index 0000000..ae79c2e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,68 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+ "Enable CIX extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6" , Alpha21264Itineraries, []>;
+def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AlphaInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
new file mode 100644
index 0000000..5dce06a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct AlphaAsmPrinter : public AsmPrinter {
+ /// Unique incrementer for label values for referencing Global values.
+ ///
+
+ explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+ : AsmPrinter(tm, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Alpha Assembly Printer";
+ }
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOp(const MachineOperand &MO, raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ void EmitStartOfAsmFile(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+ };
+} // end of anonymous namespace
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physreg??");
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ assert(MO.getImm() < (1 << 30));
+ } else {
+ printOp(MO, O);
+ }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << getRegisterName(MO.getReg());
+ return;
+
+ case MachineOperand::MO_Immediate:
+ assert(0 && "printOp() does not handle immediate values");
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ return;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ return;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyStart() {
+ OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
+}
+
+void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
+ OutStreamer.EmitRawText(StringRef("\t.set noat"));
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() {
+ RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp b/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 0000000..3768117
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,66 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+namespace {
+ struct AlphaBSel : public MachineFunctionPass {
+ static char ID;
+ AlphaBSel() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Alpha Branch Selection";
+ }
+ };
+ char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+ return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI) {
+ if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+ MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+
+ // condbranch operands:
+ // 0. bc opcode
+ // 1. reg
+ // 2. target MBB
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
+ }
+ }
+ }
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td b/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td
new file mode 100644
index 0000000..bde8819
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td
@@ -0,0 +1,38 @@
+//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Alpha architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Alpha Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Alpha : CallingConv<[
+ // i64 is returned in register R0
+ // R1 is an llvm extension, I don't know what gcc does
+ CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
+
+ // f32 / f64 are returned in F0/F1
+ CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Alpha Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Alpha : CallingConv<[
+ // The first 6 arguments are passed in registers, whether integer or
+ // floating-point
+ CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
+ [F16, F17, F18, F19, F20, F21]>>,
+
+ CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
+ [R16, R17, R18, R19, R20, R21]>>,
+
+ // Stack slots are 8 bytes in size and 8-byte aligned.
+ CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
new file mode 100644
index 0000000..690cd1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
@@ -0,0 +1,143 @@
+//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaFrameLowering.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+static long getUpper16(long l) {
+ long y = l / Alpha::IMM_MULT;
+ if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l) {
+ long h = getUpper16(l);
+ return l - h * Alpha::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasVarSizedObjects();
+}
+
+void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
+ bool FP = hasFP(MF);
+
+ // Handle GOP offset
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+ .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+ .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
+
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+ .addGlobalAddress(MF.getFunction());
+
+ // Get the number of bytes to allocate from the FrameInfo
+ long NumBytes = MFI->getStackSize();
+
+ if (FP)
+ NumBytes += 8; //reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ unsigned Align = getStackAlignment();
+ NumBytes = (NumBytes+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r30 -= numbytes
+ NumBytes = -NumBytes;
+ if (NumBytes >= Alpha::IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+ }
+
+ // Now if we need to, save the old FP and set the new
+ if (FP) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+ .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+ // This must be the last instr in the prolog
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+ .addReg(Alpha::R30).addReg(Alpha::R30);
+ }
+
+}
+
+void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ assert((MBBI->getOpcode() == Alpha::RETDAG ||
+ MBBI->getOpcode() == Alpha::RETDAGp)
+ && "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ long NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+ .addReg(Alpha::R15);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+ .addImm(0).addReg(Alpha::R15);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= Alpha::IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
new file mode 100644
index 0000000..ebd9e1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
@@ -0,0 +1,43 @@
+//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Alpha.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class AlphaSubtarget;
+
+class AlphaFrameLowering : public TargetFrameLowering {
+ const AlphaSubtarget &STI;
+ // FIXME: This should end in MachineFunctionInfo, not here!
+ mutable int curgpdist;
+public:
+ explicit AlphaFrameLowering(const AlphaSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 0000000..f877c65
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,425 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+ //===--------------------------------------------------------------------===//
+ /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+ /// instructions for SelectionDAG operations.
+ class AlphaDAGToDAGISel : public SelectionDAGISel {
+ static const int64_t IMM_LOW = -32768;
+ static const int64_t IMM_HIGH = 32767;
+ static const int64_t IMM_MULT = 65536;
+ static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+ static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
+
+ static int64_t get_ldah16(int64_t x) {
+ int64_t y = x / IMM_MULT;
+ if (x % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+ }
+
+ static int64_t get_lda16(int64_t x) {
+ return x - get_ldah16(x) * IMM_MULT;
+ }
+
+ /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+ /// instruction (if not, return 0). Note that this code accepts partial
+ /// zap masks. For example (and LHS, 1) is a valid zap, as long we know
+ /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
+ /// in checking mode. If LHS is null, we assume that the mask has already
+ /// been validated before.
+ uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
+ uint64_t BitsToCheck = 0;
+ unsigned Result = 0;
+ for (unsigned i = 0; i != 8; ++i) {
+ if (((Constant >> 8*i) & 0xFF) == 0) {
+ // nothing to do.
+ } else {
+ Result |= 1 << i;
+ if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+ // If the entire byte is set, zapnot the byte.
+ } else if (LHS.getNode() == 0) {
+ // Otherwise, if the mask was previously validated, we know its okay
+ // to zapnot this entire byte even though all the bits aren't set.
+ } else {
+ // Otherwise we don't know that the it's okay to zapnot this entire
+ // byte. Only do this iff we can prove that the missing bits are
+ // already null, so the bytezap doesn't need to really null them.
+ BitsToCheck |= ~Constant & (0xFFULL << 8*i);
+ }
+ }
+ }
+
+ // If there are missing bits in a byte (for example, X & 0xEF00), check to
+ // see if the missing bits (0x1000) are already known zero if not, the zap
+ // isn't okay to do, as it won't clear all the required bits.
+ if (BitsToCheck &&
+ !CurDAG->MaskedValueIsZero(LHS,
+ APInt(LHS.getValueSizeInBits(),
+ BitsToCheck)))
+ return 0;
+
+ return Result;
+ }
+
+ static uint64_t get_zapImm(uint64_t x) {
+ unsigned build = 0;
+ for(int i = 0; i != 8; ++i) {
+ if ((x & 0x00FF) == 0x00FF)
+ build |= 1 << i;
+ else if ((x & 0x00FF) != 0)
+ return 0;
+ x >>= 8;
+ }
+ return build;
+ }
+
+
+ static uint64_t getNearPower2(uint64_t x) {
+ if (!x) return 0;
+ unsigned at = CountLeadingZeros_64(x);
+ uint64_t complow = 1ULL << (63 - at);
+ uint64_t comphigh = complow << 1;
+ if (x - complow <= comphigh - x)
+ return complow;
+ else
+ return comphigh;
+ }
+
+ static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+ uint64_t y = getNearPower2(x);
+ if (swap)
+ return (y - x) == r;
+ else
+ return (x - y) == r;
+ }
+
+ public:
+ explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
+ : SelectionDAGISel(TM)
+ {}
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(int64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ virtual const char *getPassName() const {
+ return "Alpha DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ Op0 = Op;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ return false;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+
+private:
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const AlphaTargetMachine &getTargetMachine() {
+ return static_cast<const AlphaTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const AlphaInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *getGlobalRetAddr();
+ void SelectCALL(SDNode *Op);
+
+ };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// getGlobalRetAddr - Grab the return address.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
+ unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
+ return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case AlphaISD::CALL:
+ SelectCALL(N);
+ return NULL;
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i32),
+ getI64Imm(0));
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+ case AlphaISD::GlobalRetAddr:
+ return getGlobalRetAddr();
+
+ case AlphaISD::DivCall: {
+ SDValue Chain = CurDAG->getEntryNode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1,
+ SDValue(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2,
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0,
+ Chain.getValue(1));
+ SDNode *CNode =
+ CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue,
+ Chain, Chain.getValue(1));
+ Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64,
+ SDValue(CNode, 1));
+ return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+ }
+
+ case ISD::READCYCLECOUNTER: {
+ SDValue Chain = N->getOperand(0);
+ return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+ Chain);
+ }
+
+ case ISD::Constant: {
+ uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (uval == 0) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Alpha::R31, MVT::i64);
+ ReplaceUses(SDValue(N, 0), Result);
+ return NULL;
+ }
+
+ int64_t val = (int64_t)uval;
+ int32_t val32 = (int32_t)val;
+ if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+ val >= IMM_LOW + IMM_LOW * IMM_MULT)
+ break; //(LDAH (LDA))
+ if ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+ // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true
+ break; //(zext (LDAH (LDA)))
+ //Else use the constant pool
+ ConstantInt *C = ConstantInt::get(
+ Type::getInt64Ty(*CurDAG->getContext()), uval);
+ SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+ SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+ SDValue(getGlobalBaseReg(), 0));
+ return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other,
+ CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ EVT T = isDouble ? MVT::f64 : MVT::f32;
+ if (CN->getValueAPF().isPosZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else if (CN->getValueAPF().isNegZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else {
+ report_fatal_error("Unhandled FP constant type");
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ unsigned Opc = Alpha::WTF;
+ bool rev = false;
+ bool inv = false;
+ switch(CC) {
+ default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
+ case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+ Opc = Alpha::CMPTEQ; break;
+ case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT:
+ Opc = Alpha::CMPTLT; break;
+ case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE:
+ Opc = Alpha::CMPTLE; break;
+ case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT:
+ Opc = Alpha::CMPTLT; rev = true; break;
+ case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE:
+ Opc = Alpha::CMPTLE; rev = true; break;
+ case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+ Opc = Alpha::CMPTEQ; inv = true; break;
+ case ISD::SETO:
+ Opc = Alpha::CMPTUN; inv = true; break;
+ case ISD::SETUO:
+ Opc = Alpha::CMPTUN; break;
+ };
+ SDValue tmp1 = N->getOperand(rev?1:0);
+ SDValue tmp2 = N->getOperand(rev?0:1);
+ SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
+ if (inv)
+ cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl,
+ MVT::f64, SDValue(cmp, 0),
+ CurDAG->getRegister(Alpha::F31, MVT::f64));
+ switch(CC) {
+ case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+ case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+ {
+ SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
+ tmp1, tmp2);
+ cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64,
+ SDValue(cmp2, 0), SDValue(cmp, 0));
+ break;
+ }
+ default: break;
+ }
+
+ SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
+ MVT::i64, SDValue(cmp, 0));
+ return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64,
+ CurDAG->getRegister(Alpha::R31, MVT::i64),
+ SDValue(LD,0));
+ }
+ break;
+
+ case ISD::AND: {
+ ConstantSDNode* SC = NULL;
+ ConstantSDNode* MC = NULL;
+ if (N->getOperand(0).getOpcode() == ISD::SRL &&
+ (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+ (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+ uint64_t sval = SC->getZExtValue();
+ uint64_t mval = MC->getZExtValue();
+ // If the result is a zap, let the autogened stuff handle it.
+ if (get_zapImm(N->getOperand(0), mval))
+ break;
+ // given mask X, and shift S, we want to see if there is any zap in the
+ // mask if we play around with the botton S bits
+ uint64_t dontcare = (~0ULL) >> (64 - sval);
+ uint64_t mask = mval << sval;
+
+ if (get_zapImm(mask | dontcare))
+ mask = mask | dontcare;
+
+ if (get_zapImm(mask)) {
+ SDValue Z =
+ SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
+ N->getOperand(0).getOperand(0),
+ getI64Imm(get_zapImm(mask))), 0);
+ return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z,
+ getI64Imm(sval));
+ }
+ }
+ break;
+ }
+
+ }
+
+ return SelectCode(N);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
+ //TODO: add flag stuff to prevent nondeturministic breakage!
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+ SDValue GOT = SDValue(getGlobalBaseReg(), 0);
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other,
+ MVT::Glue, Addr.getOperand(0),
+ Chain, InFlag), 0);
+ } else {
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
+ MVT::Glue, Chain, InFlag), 0);
+ }
+ InFlag = Chain.getValue(1);
+
+ ReplaceUses(SDValue(N, 0), Chain);
+ ReplaceUses(SDValue(N, 1), InFlag);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
+ return new AlphaDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 0000000..3057eb8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,962 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ // Set up the TargetLowering object.
+ //I am having problems with shr n i8 1
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+ addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+ addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+ if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM , MVT::i64, Custom);
+ setOperationAction(ISD::UREM , MVT::i64, Custom);
+ setOperationAction(ISD::SDIV , MVT::i64, Custom);
+ setOperationAction(ISD::UDIV , MVT::i64, Custom);
+
+ setOperationAction(ISD::ADDC , MVT::i64, Expand);
+ setOperationAction(ISD::ADDE , MVT::i64, Expand);
+ setOperationAction(ISD::SUBC , MVT::i64, Expand);
+ setOperationAction(ISD::SUBE , MVT::i64, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Promote);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Not implemented yet.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool and
+ // ExternalSymbols nodes into the appropriate instructions to
+ // materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i32, Custom);
+
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+ setJumpBufSize(272);
+ setJumpBufAlignment(16);
+
+ setMinFunctionAlignment(4);
+
+ setInsertFencesForAtomic(true);
+
+ computeRegisterProperties();
+}
+
+EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i64;
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+ case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+ case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+ case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+ case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+ case AlphaISD::RelLit: return "Alpha::RelLit";
+ case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+ case AlphaISD::CALL: return "Alpha::CALL";
+ case AlphaISD::DivCall: return "Alpha::DivCall";
+ case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+ case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+ case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+ }
+}
+
+static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
+ return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n 0(SP) ... (n-7)*8(SP)
+
+// //#define FP $15
+// //#define RA $26
+// //#define PV $27
+// //#define GP $29
+// //#define SP $30
+
+#include "AlphaGenCallingConv.inc"
+
+SDValue
+AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Alpha target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ VA.getLocVT(), InFlag).getValue(1);
+ SDValue RetValue = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+
+ // If this is an 8/16/32-bit value, it is really passed promoted to 64
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+ InVals.push_back(RetValue);
+ }
+
+ return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+ unsigned args_int[] = {
+ Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+ unsigned args_float[] = {
+ Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue argt;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ SDValue ArgVal;
+
+ if (ArgNo < 6) {
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::f64:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::f32:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F4RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::i64:
+ args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+ &Alpha::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
+ break;
+ }
+ } else { //more args
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
+ }
+ InVals.push_back(ArgVal);
+ }
+
+ // If the functions takes variable number of arguments, copy all regs to stack
+ if (isVarArg) {
+ FuncInfo->setVarArgsOffset(Ins.size() * 8);
+ std::vector<SDValue> LS;
+ for (int i = 0; i < 6; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
+ args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+ SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
+ if (i == 0) FuncInfo->setVarArgsBase(FI);
+ SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+ false, false, 0));
+
+ if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
+ args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+ argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
+ SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+ false, false, 0));
+ }
+
+ //Set up a token factor with all the stack traffic
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+ }
+
+ return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
+ DAG.getNode(AlphaISD::GlobalRetAddr,
+ DebugLoc(), MVT::i64),
+ SDValue());
+ switch (Outs.size()) {
+ default:
+ llvm_unreachable("Do not know how to return this many arguments!");
+ case 0:
+ break;
+ //return SDValue(); // ret void is legal
+ case 1: {
+ EVT ArgVT = Outs[0].VT;
+ unsigned ArgReg;
+ if (ArgVT.isInteger())
+ ArgReg = Alpha::R0;
+ else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg = Alpha::F0;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+ OutVals[0], Copy.getValue(1));
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
+ break;
+ }
+ case 2: {
+ EVT ArgVT = Outs[0].VT;
+ unsigned ArgReg1, ArgReg2;
+ if (ArgVT.isInteger()) {
+ ArgReg1 = Alpha::R0;
+ ArgReg2 = Alpha::R1;
+ } else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg1 = Alpha::F0;
+ ArgReg2 = Alpha::F1;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+ OutVals[0], Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+ OutVals[1], Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
+ break;
+ }
+ }
+ return DAG.getNode(AlphaISD::RET_FLAG, dl,
+ MVT::Other, Copy, Copy.getValue(1));
+}
+
+void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
+ SDValue &DataPtr,
+ SelectionDAG &DAG) const {
+ Chain = N->getOperand(0);
+ SDValue VAListP = N->getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+ MachinePointerInfo(VAListS),
+ false, false, 0);
+ SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ Tmp, MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+ DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+ if (N->getValueType(0).isFloatingPoint())
+ {
+ //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+ SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+ DAG.getConstant(8*6, MVT::i64));
+ SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+ DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+ DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+ }
+
+ SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
+ DAG.getConstant(8, MVT::i64));
+ Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: break; // Don't custom lower most intrinsics.
+ case Intrinsic::alpha_umulh:
+ return DAG.getNode(ISD::MULHU, dl, MVT::i64,
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+
+ case ISD::SRL_PARTS: {
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+ DAG.getConstant(64, MVT::i64), ShAmt);
+ SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+ DAG.getConstant(0, MVT::i64), ISD::SETLE);
+ // if 64 - shAmt <= 0
+ SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
+ SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
+ DAG.getConstant(0, MVT::i64), bm);
+ SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+ // else
+ SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+ SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
+ SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+ Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+ // Merge
+ SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+ SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ // case ISD::SRA_PARTS:
+
+ // case ISD::SHL_PARTS:
+
+
+ case ISD::SINT_TO_FP: {
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ SDValue LD;
+ bool isDouble = Op.getValueType() == MVT::f64;
+ LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
+ isDouble?MVT::f64:MVT::f32, LD);
+ return FP;
+ }
+ case ISD::FP_TO_SINT: {
+ bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+ SDValue src = Op.getOperand(0);
+
+ if (!isDouble) //Promote
+ src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+ src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
+
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
+ }
+ case ISD::ConstantPool: {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+ // FIXME there isn't really any debug info here
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
+ return Lo;
+ }
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Alpha.");
+ case ISD::GlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+ GSDN->getOffset());
+ // FIXME there isn't really any debug info here
+
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+ // && !GV->hasLinkOnceLinkage()) {
+ if (GV->hasLocalLinkage()) {
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
+ return Lo;
+ } else
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+ case ISD::ExternalSymbol: {
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
+ DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+ ->getSymbol(), MVT::i64),
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+
+ case ISD::UREM:
+ case ISD::SREM:
+ //Expand only on constant case
+ if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
+ BuildUDIV(Op.getNode(), DAG, NULL) :
+ BuildSDIV(Op.getNode(), DAG, NULL);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+ return Tmp1;
+ }
+ //fall through
+ case ISD::SDIV:
+ case ISD::UDIV:
+ if (Op.getValueType().isInteger()) {
+ if (Op.getOperand(1).getOpcode() == ISD::Constant)
+ return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
+ : BuildUDIV(Op.getNode(), DAG, NULL);
+ const char* opstr = 0;
+ switch (Op.getOpcode()) {
+ case ISD::UREM: opstr = "__remqu"; break;
+ case ISD::SREM: opstr = "__remq"; break;
+ case ISD::UDIV: opstr = "__divqu"; break;
+ case ISD::SDIV: opstr = "__divq"; break;
+ }
+ SDValue Tmp1 = Op.getOperand(0),
+ Tmp2 = Op.getOperand(1),
+ Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+ return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+ }
+ break;
+
+ case ISD::VAARG: {
+ SDValue Chain, DataPtr;
+ LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+
+ SDValue Result;
+ if (Op.getValueType() == MVT::i32)
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ MachinePointerInfo(), MVT::i32, false, false, 0);
+ else
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return Result;
+ }
+ case ISD::VACOPY: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue DestP = Op.getOperand(1);
+ SDValue SrcP = Op.getOperand(2);
+ const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+ MachinePointerInfo(SrcS),
+ false, false, 0);
+ SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
+ MachinePointerInfo(DestS),
+ false, false, 0);
+ SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+ DAG.getConstant(8, MVT::i64));
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ NP, MachinePointerInfo(), MVT::i32, false, false, 0);
+ SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+ MachinePointerInfo(), MVT::i32,
+ false, false, 0);
+ }
+ case ISD::VASTART: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAListP = Op.getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // vastart stores the address of the VarArgsBase and VarArgsOffset
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
+ SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP,
+ MachinePointerInfo(VAListS), false, false, 0);
+ SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(S1, dl,
+ DAG.getConstant(FuncInfo->getVarArgsOffset(),
+ MVT::i64),
+ SA2, MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+ }
+ case ISD::RETURNADDR:
+ return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
+ //FIXME: implement
+ case ISD::FRAMEADDR: break;
+ }
+
+ return SDValue();
+}
+
+void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ assert(N->getValueType(0) == MVT::i32 &&
+ N->getOpcode() == ISD::VAARG &&
+ "Unknown node to custom promote!");
+
+ SDValue Chain, DataPtr;
+ LowerVAARG(N, Chain, DataPtr, DAG);
+ SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ Results.push_back(Res);
+ Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'f':
+ case 'r':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AlphaTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'f':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, Alpha::GPRCRegisterClass);
+ case 'f':
+ return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
+ std::make_pair(0U, Alpha::F4RCRegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ assert((MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::CAS64 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::LAS64 ||
+ MI->getOpcode() == Alpha::SWAP32 ||
+ MI->getOpcode() == Alpha::SWAP64) &&
+ "Unexpected instr type to insert");
+
+ bool is32 = MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::SWAP32;
+
+ //Load locked store conditional for atomic ops take on the same form
+ //start:
+ //ll
+ //do stuff (maybe branch to exit)
+ //sc
+ //test sc and maybe branck to start
+ //exit:
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ sinkMBB->splice(sinkMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ F->insert(It, llscMBB);
+ F->insert(It, sinkMBB);
+
+ BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
+
+ unsigned reg_res = MI->getOperand(0).getReg(),
+ reg_ptr = MI->getOperand(1).getReg(),
+ reg_v2 = MI->getOperand(2).getReg(),
+ reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
+ reg_res).addImm(0).addReg(reg_ptr);
+ switch (MI->getOpcode()) {
+ case Alpha::CAS32:
+ case Alpha::CAS64: {
+ unsigned reg_cmp
+ = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
+ .addReg(reg_v2).addReg(reg_res);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+ break;
+ }
+ case Alpha::LAS32:
+ case Alpha::LAS64: {
+ BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+ .addReg(reg_res).addReg(reg_v2);
+ break;
+ }
+ case Alpha::SWAP32:
+ case Alpha::SWAP64: {
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(reg_v2).addReg(reg_v2);
+ break;
+ }
+ }
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+ .addReg(reg_store).addImm(0).addReg(reg_ptr);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_store).addMBB(llscMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(sinkMBB);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+
+ return sinkMBB;
+}
+
+bool
+AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Alpha target isn't yet aware of offsets.
+ return false;
+}
+
+bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+ // +0.0 F31
+ // +0.0f F31
+ // -0.0 -F31
+ // -0.0f -F31
+ return Imm.isZero() || Imm.isNegZero();
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 0000000..80f8efa
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,142 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+ namespace AlphaISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ //These corrospond to the identical Instruction
+ CVTQT_, CVTQS_, CVTTQ_,
+
+ /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+ /// parts of a global address respectively.
+ GPRelHi, GPRelLo,
+
+ /// RetLit - Literal Relocation of a Global
+ RelLit,
+
+ /// GlobalRetAddr - used to restore the return address
+ GlobalRetAddr,
+
+ /// CALL - Normal call.
+ CALL,
+
+ /// DIVCALL - used for special library calls for div and rem
+ DivCall,
+
+ /// return flag operand
+ RET_FLAG,
+
+ /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction.
+ /// *PRC is the input register to compare to zero,
+ /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+ /// DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH_I, COND_BRANCH_F
+
+ };
+ }
+
+ class AlphaTargetLowering : public TargetLowering {
+ public:
+ explicit AlphaTargetLowering(TargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+ /// getSetCCResultType - Get the SETCC result ValueType
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ // Friendly names for dumps
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ private:
+ // Helpers for custom lowering.
+ void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ };
+}
+
+#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 0000000..6f4ebf2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,268 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm : Operand<i64>;
+def s14imm : Operand<i64>;
+def s16imm : Operand<i64>;
+def s21imm : Operand<i64>;
+def s64imm : Operand<i64>;
+def u64imm : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+ let Namespace = "Alpha";
+ let AsmString = asmstr;
+ let Inst{31-26} = op;
+ let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let canFoldAsLoad = load;
+ let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+ bits<5> Ra;
+ bits<16> disp;
+ bits<5> Rb;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+
+ let OutOperandList = (outs GPRC:$RA);
+ let InOperandList = (ins);
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern=pattern;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+ bits<64> Opc; //dummy
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs);
+ let InOperandList = (ins target:$DISP);
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = 31;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+// let Constraints = "$RFALSE = $RDEST";
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, u8imm:$L);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+// let Constraints = "$RFALSE = $RDEST";
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+
+ bits<5> Fc;
+ bits<5> Fa;
+ bits<5> Fb;
+ bits<11> Function = fun;
+
+ let Inst{25-21} = Fa;
+ let Inst{20-16} = Fb;
+ let Inst{15-5} = Function;
+ let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+ bits<26> Function;
+
+ let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<0, nm, itin> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let Pattern = pattern;
+
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 0000000..8df2ed7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,382 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AlphaGenInstrInfo.inc"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+ : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+ RI(*this) {
+}
+
+
+unsigned
+AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::LDL:
+ case Alpha::LDQ:
+ case Alpha::LDBU:
+ case Alpha::LDWU:
+ case Alpha::LDS:
+ case Alpha::LDT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned
+AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::STL:
+ case Alpha::STQ:
+ case Alpha::STB:
+ case Alpha::STW:
+ case Alpha::STS:
+ case Alpha::STT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ:
+ case Alpha::BNE:
+ case Alpha::BGE:
+ case Alpha::BGT:
+ case Alpha::BLE:
+ case Alpha::BLT:
+ case Alpha::BLBC:
+ case Alpha::BLBS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Alpha branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
+ else // Conditional branch
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
+ return 2;
+}
+
+void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ llvm_unreachable("Attempt to copy register that is not GPR or FPR");
+ }
+}
+
+void
+AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+ // << FrameIdx << "\n";
+ //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STT))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STQ))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ llvm_unreachable("Unhandled register class");
+}
+
+void
+AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+ // << FrameIdx << "\n";
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ llvm_unreachable("Unhandled register class");
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ: return Alpha::BNE;
+ case Alpha::BNE: return Alpha::BEQ;
+ case Alpha::BGE: return Alpha::BLT;
+ case Alpha::BGT: return Alpha::BLE;
+ case Alpha::BLE: return Alpha::BGT;
+ case Alpha::BLT: return Alpha::BGE;
+ case Alpha::BLBC: return Alpha::BLBS;
+ case Alpha::BLBS: return Alpha::BLBC;
+ case Alpha::FBEQ: return Alpha::FBNE;
+ case Alpha::FBNE: return Alpha::FBEQ;
+ case Alpha::FBGE: return Alpha::FBLT;
+ case Alpha::FBGT: return Alpha::FBLE;
+ case Alpha::FBLE: return Alpha::FBGT;
+ case Alpha::FBLT: return Alpha::FBGE;
+ default:
+ llvm_unreachable("Unknown opcode");
+ }
+ return 0; // Not reached
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Alpha::BR) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+ if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Alpha::BRs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Alpha::BR &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != Alpha::BR &&
+ I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+ Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+ return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+ unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Alpha::R29);
+ RegInfo.addLiveIn(Alpha::R29);
+
+ AlphaFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
+
+/// getGlobalRetAddr - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
+ AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+ unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr();
+ if (GlobalRetAddr != 0)
+ return GlobalRetAddr;
+
+ // Insert the set of GlobalRetAddr into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalRetAddr).addReg(Alpha::R26);
+ RegInfo.addLiveIn(Alpha::R26);
+
+ AlphaFI->setGlobalRetAddr(GlobalRetAddr);
+ return GlobalRetAddr;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 0000000..337a85c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,85 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AlphaGenInstrInfo.inc"
+
+namespace llvm {
+
+class AlphaInstrInfo : public AlphaGenInstrInfo {
+ const AlphaRegisterInfo RI;
+public:
+ AlphaInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+ /// getGlobalRetAddr - Return a virtual register initialized with the
+ /// the global return address register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalRetAddr(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 0000000..c8c9377
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1159 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [
+ SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
+def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>;
+
+def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+ SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+ return getI64Imm(~N->getZExtValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+ return getI64Imm(~N->getZExtValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+ return getI64Imm(get_lda16(N->getZExtValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+ return getI64Imm(get_ldah16(N->getZExtValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+ ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+ return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+ return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+ uint64_t x =
+ abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+ return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+ return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+ return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+ return ((uint64_t)~N->getZExtValue() + 1) ==
+ (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ (int64_t)N->getZExtValue();
+}]>;
+def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS) return 0;
+ uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+ return build != 0;
+}]>;
+
+def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+ (void)N; // silence warning.
+ return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+ return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+ N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+ return isPowerOf2_64(N->getZExtValue() -
+ getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+ int64_t d = abs64((int64_t)N->getZExtValue() -
+ (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+ if (isPowerOf2_64(d)) return false;
+ switch (d) {
+ case 1: case 3: case 5: return false;
+ default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+ };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 2), node:$op2)>;
+def sub4 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 2), node:$op2)>;
+def add8 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 3), node:$op2)>;
+def sub8 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
+ "; ADJUP $amt",
+ [(callseq_start timm:$amt)], s_pseudo>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+ "; ADJDOWN $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
+}
+
+def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+ "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+ (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+ (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq,
+ string asmstr, PatFrag OpNode, InstrItinClass itin> {
+ def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+ def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+ def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+ def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>;
+defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>;
+defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>;
+defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>;
+defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>;
+
+defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>;
+defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>;
+defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC",
+ [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC",
+ [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC",
+ [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+
+def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(zappat:$imm GPRC:$RA),
+ (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+ def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+ def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0",
+ [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, Ra = 26,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+ def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, Ra = 26, Rb = 27, disp = 0,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+ def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, Ra = 23, Rb = 27, disp = 0,
+ Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+ def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
+ [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
+ [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+}
+
+
+let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
+ [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
+ [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
+ [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
+ [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+ [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+}
+
+//Load address
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)",
+ [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address
+def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
+ [], s_lda>; //Load address high
+def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+ [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high
+}
+
+let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)",
+ [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+ [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)",
+ [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+ [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)",
+ [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+ [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
+ [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+ [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+ (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+ (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+ (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+ (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDBU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDWU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDL immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+ (LDQ 0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+ (LDT 0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+ (LDS 0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+ (STQ GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+ (STT F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+ (STS F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+ (STL GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+ (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+ (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OutOperandList = (outs GPRC:$RA),
+ InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+ mayLoad = 1 in {
+def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+}
+
+//Load quad, rellocated literal form
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in
+def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+ [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+ (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+let OutOperandList = (outs GPRC:$RR),
+ InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+ Constraints = "$RA = $RR",
+ DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (outs GPRC:$RA),
+ InOperandList = (ins s64imm:$DISP, GPRC:$RB),
+ mayLoad = 1 in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
+ (WMB)>;
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
+ (MB)>;
+
+def : Pat<(atomic_fence (imm), (imm)), (MB)>;
+
+//Basic Floating point ops
+
+//Floats
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+ [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
+def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+ [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
+def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
+def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
+def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OutOperandList = (outs F4RC:$RDEST),
+ InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQS : FPForm<0x17, 0x02A,
+ "fcmoveq $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D,
+ "fcmovge $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F,
+ "fcmovgt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E,
+ "fcmovle $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C,
+ "fcmovlt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B,
+ "fcmovne $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OutOperandList = (outs F8RC:$RDEST),
+ InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
+ [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+ [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+ [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+ [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+ [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+def : Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
+ (f64 (FCMOVEQT F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>;
+def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
+ (f32 (FCMOVEQS F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>;
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+let Ra = 31, isBarrier = 1 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst",
+ s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+ s_fbr>;
+//Branches, int
+def BEQ : br_icc<0x39, "beq">;
+def BGE : br_icc<0x3E, "bge">;
+def BGT : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE : br_icc<0x3B, "ble">;
+def BLT : br_icc<0x3A, "blt">;
+def BNE : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+ switch((uint64_t)N->getZExtValue()) {
+ default: assert(0 && "Unknown branch type");
+ case 0: return getI64Imm(Alpha::BEQ);
+ case 1: return getI64Imm(Alpha::BNE);
+ case 2: return getI64Imm(Alpha::BGE);
+ case 3: return getI64Imm(Alpha::BGT);
+ case 4: return getI64Imm(Alpha::BLE);
+ case 5: return getI64Imm(Alpha::BLT);
+ case 6: return getI64Imm(Alpha::BLBS);
+ case 7: return getI64Imm(Alpha::BLBC);
+ case 20: return getI64Imm(Alpha::FBEQ);
+ case 21: return getI64Imm(Alpha::FBNE);
+ case 22: return getI64Imm(Alpha::FBGE);
+ case 23: return getI64Imm(Alpha::FBGT);
+ case 24: return getI64Imm(Alpha::FBLE);
+ case 25: return getI64Imm(Alpha::FBLT);
+ }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extensions, so let's not use them yet
+//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair
+ int64_t val = (int64_t)N->getZExtValue();
+ return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair with zeroext
+ uint64_t uval = N->getZExtValue();
+ int32_t val32 = (int32_t)uval;
+ return ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_FULLHIGH);
+// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+ (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+ (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+ (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+ (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
+ (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+// (R31)>;
+//def : Pat<(f64 0.0),
+// (F31)>;
+//def : Pat<(f64 -0.0),
+// (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+// (F31)>;
+//def : Pat<(f32 -0.0),
+// (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+ (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+ (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+ (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+ (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+ (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+ (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+ (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+ (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+ (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+ (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA),
+ (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+ (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+//FIXME: figure out why something is truncating the imm to 32bits
+// this will fix 2007-11-27-mulneg3
+//def : Pat<(mul GPRC:$RA, immRem1n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+//def : Pat<(mul GPRC:$RA, immRem2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp b/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 0000000..85fbfd1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,158 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+ cl::opt<bool>
+ AlignAll("alpha-align-all", cl::Hidden,
+ cl::desc("Align all blocks"));
+
+ struct AlphaLLRPPass : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ AlphaTargetMachine &TM;
+
+ static char ID;
+ AlphaLLRPPass(AlphaTargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Alpha NOP inserter";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+ bool Changed = false;
+ MachineInstr* prev[3] = {0,0,0};
+ DebugLoc dl;
+ unsigned count = 0;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ MachineBasicBlock& MBB = *FI;
+ bool ub = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ if (count%4 == 0)
+ prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+ ++count;
+ MachineInstr *MI = I++;
+ switch (MI->getOpcode()) {
+ case Alpha::LDQ: case Alpha::LDL:
+ case Alpha::LDWU: case Alpha::LDBU:
+ case Alpha::LDT: case Alpha::LDS:
+ case Alpha::STQ: case Alpha::STL:
+ case Alpha::STW: case Alpha::STB:
+ case Alpha::STT: case Alpha::STS:
+ if (MI->getOperand(2).getReg() == Alpha::R30) {
+ if (prev[0] &&
+ prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
+ prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 1;
+ count += 1;
+ } else if (prev[1]
+ && prev[1]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[1]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[2];
+ prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 2;
+ count += 2;
+ } else if (prev[2]
+ && prev[2]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[2]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ Changed = true; nopintro += 3;
+ count += 3;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = MI;
+ break;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ case Alpha::ALTENT:
+ case Alpha::MEMLABEL:
+ case Alpha::PCLABEL:
+ --count;
+ break;
+ case Alpha::BR:
+ case Alpha::JMP:
+ ub = true;
+ //fall through
+ default:
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ }
+ }
+ if (ub || AlignAll) {
+ //we can align stuff for free at this point
+ while (count % 4) {
+ BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ ++count;
+ ++nopalign;
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ }
+ }
+ }
+ return Changed;
+ }
+ };
+ char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+ return new AlphaLLRPPass(tm);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h
new file mode 100644
index 0000000..186738c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -0,0 +1,62 @@
+//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Alpha-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMACHINEFUNCTIONINFO_H
+#define ALPHAMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// AlphaMachineFunctionInfo - This class is derived from MachineFunction
+/// private Alpha target-specific information for each MachineFunction.
+class AlphaMachineFunctionInfo : public MachineFunctionInfo {
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// GlobalRetAddr = keeps track of the virtual register initialized for
+ /// the return address value.
+ unsigned GlobalRetAddr;
+
+ /// VarArgsOffset - What is the offset to the first vaarg
+ int VarArgsOffset;
+ /// VarArgsBase - What is the base FrameIndex
+ int VarArgsBase;
+
+public:
+ AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
+ VarArgsOffset(0), VarArgsBase(0) {}
+
+ explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
+ GlobalRetAddr(0),
+ VarArgsOffset(0),
+ VarArgsBase(0) {}
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
+ void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+
+ int getVarArgsOffset() const { return VarArgsOffset; }
+ void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+ int getVarArgsBase() const { return VarArgsBase; }
+ void setVarArgsBase(int Base) { VarArgsBase = Base; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 0000000..8b6230f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,199 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+ : AlphaGenRegisterInfo(Alpha::R26), TII(tii) {
+}
+
+static long getUpper16(long l) {
+ long y = l / Alpha::IMM_MULT;
+ if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l) {
+ long h = getUpper16(l);
+ return l - h * Alpha::IMM_MULT;
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ Alpha::R9, Alpha::R10,
+ Alpha::R11, Alpha::R12,
+ Alpha::R13, Alpha::R14,
+ Alpha::F2, Alpha::F3,
+ Alpha::F4, Alpha::F5,
+ Alpha::F6, Alpha::F7,
+ Alpha::F8, Alpha::F9, 0
+ };
+ return CalleeSavedRegs;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Alpha::R15);
+ Reserved.set(Alpha::R29);
+ Reserved.set(Alpha::R30);
+ Reserved.set(Alpha::R31);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+ // <amt>'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(-Amount).addReg(Alpha::R30);
+ } else {
+ assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(Amount).addReg(Alpha::R30);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void
+AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ bool FP = TFI->hasFP(MF);
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Add the base register of R30 (SP) or R15 (FP).
+ MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+ // Now add the frame object offset to the offset from the virtual frame index.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
+
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ DEBUG(errs() << "Corrected Offset " << Offset
+ << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
+
+ if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
+ DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
+ << Offset << "\n");
+ //so in this case, we need to use a temporary register, and move the
+ //original inst off the SP/FP
+ //fix up the old:
+ MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+ MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+ //insert the new
+ MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
+ TII.get(Alpha::LDAH), Alpha::R28)
+ .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+ MBB.insert(II, nMI);
+ } else {
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+ std::string s(AlphaRegDesc[reg].Name);
+ return s;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 0000000..e35be27
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,56 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AlphaGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 0000000..32120d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,133 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+//#define FP $15
+//#define RA $26
+//#define PV $27
+//#define GP $29
+//#define SP $30
+
+// General-purpose registers
+def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>;
+def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>;
+def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>;
+def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>;
+def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>;
+def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>;
+def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>;
+def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>;
+def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>;
+def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>;
+def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>;
+def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>;
+def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>;
+def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>;
+def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>;
+def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>;
+def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>;
+def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>;
+def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
+
+ // //#define FP $15
+ // //#define RA $26
+ // //#define PV $27
+ // //#define GP $29
+ // //#define SP $30
+ // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64, (add
+ // Volatile
+ R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+ R23, R24, R25, R28,
+ //Special meaning, but volatile
+ R27, //procedure address
+ R26, //return address
+ R29, //global offset table address
+ // Non-volatile
+ R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+ R15, R30, R31)>; //zero
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31)>; //zero
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h b/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 0000000..4c92045
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Alpha {
+ enum RelocationType {
+ reloc_literal,
+ reloc_gprellow,
+ reloc_gprelhigh,
+ reloc_gpdist,
+ reloc_bsr
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 0000000..3703dd4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,85 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0 : FuncUnit;
+def L1 : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0 : FuncUnit;
+def U1 : FuncUnit;
+def FA : FuncUnit;
+def FM : FuncUnit;
+
+def s_ild : InstrItinClass;
+def s_fld : InstrItinClass;
+def s_ist : InstrItinClass;
+def s_fst : InstrItinClass;
+def s_lda : InstrItinClass;
+def s_rpcc : InstrItinClass;
+def s_rx : InstrItinClass;
+def s_mxpr : InstrItinClass;
+def s_icbr : InstrItinClass;
+def s_ubr : InstrItinClass;
+def s_jsr : InstrItinClass;
+def s_iadd : InstrItinClass;
+def s_ilog : InstrItinClass;
+def s_ishf : InstrItinClass;
+def s_cmov : InstrItinClass;
+def s_imul : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr : InstrItinClass;
+def s_fadd : InstrItinClass;
+def s_fmul : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi : InstrItinClass;
+def s_itof : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2-4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<
+ [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
+ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
+ InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
+ InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
+ InstrItinData<s_fst , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_lda , [InstrStage<1, [L0, L1, U0, U1]>]>,
+ InstrItinData<s_rpcc , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_rx , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_mxpr , [InstrStage<1, [L0, L1]>]>,
+ InstrItinData<s_icbr , [InstrStage<0, [U0, U1]>]>,
+ InstrItinData<s_ubr , [InstrStage<3, [U0, U1]>]>,
+ InstrItinData<s_jsr , [InstrStage<3, [L0]>]>,
+ InstrItinData<s_iadd , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ilog , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ishf , [InstrStage<1, [U0, U1]>]>,
+ InstrItinData<s_cmov , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_imul , [InstrStage<7, [U1]>]>,
+ InstrItinData<s_imisc , [InstrStage<3, [U0]>]>,
+ InstrItinData<s_fbr , [InstrStage<0, [FA]>]>,
+ InstrItinData<s_fadd , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fmul , [InstrStage<6, [FM]>]>,
+ InstrItinData<s_fcmov , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fdivs , [InstrStage<12, [FA]>]>,
+ InstrItinData<s_fdivt , [InstrStage<15, [FA]>]>,
+ InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+ InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+ InstrItinData<s_ftoi , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_itof , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
new file mode 100644
index 0000000..f1958fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-selectiondag-info"
+#include "AlphaTargetMachine.h"
+using namespace llvm;
+
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h
new file mode 100644
index 0000000..3405cc0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASELECTIONDAGINFO_H
+#define ALPHASELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AlphaTargetMachine;
+
+class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
+ ~AlphaSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 0000000..bd55ce9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,35 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AlphaGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS)
+ : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 0000000..70b3116
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,49 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AlphaGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRe;
+
+class AlphaSubtarget : public AlphaGenSubtargetInfo {
+protected:
+
+ bool HasCT;
+
+ InstrItineraryData InstrItins;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ AlphaSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 0000000..fc9a677
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,51 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeAlphaTarget() {
+ // Register the target.
+ RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ DataLayout("e-f128:128:128-n64"),
+ FrameLowering(Subtarget),
+ Subtarget(TT, CPU, FS),
+ TLInfo(*this),
+ TSInfo(*this) {
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createAlphaISelDag(*this));
+ return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer
+ PM.add(createAlphaBranchSelectionPass());
+ PM.add(createAlphaLLRPPass(*this));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 0000000..48bb948
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,66 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "AlphaInstrInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaFrameLowering.h"
+#include "AlphaSelectionDAGInfo.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ AlphaInstrInfo InstrInfo;
+ AlphaFrameLowering FrameLowering;
+ AlphaSubtarget Subtarget;
+ AlphaTargetLowering TLInfo;
+ AlphaSelectionDAGInfo TSInfo;
+
+public:
+ AlphaTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const AlphaRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const AlphaTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
new file mode 100644
index 0000000..a35e884
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCAsmInfo.h"
+using namespace llvm;
+
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ GPRel32Directive = ".gprel32";
+ WeakRefDirective = "\t.weak\t";
+ HasSetDirective = false;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
new file mode 100644
index 0000000..837844b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct AlphaMCAsmInfo : public MCAsmInfo {
+ explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
new file mode 100644
index 0000000..4ad021c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -0,0 +1,78 @@
+//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCTargetDesc.h"
+#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AlphaGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createAlphaMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAlphaMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAlphaMCRegisterInfo(X, Alpha::R26);
+ return X;
+}
+
+static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(Reloc::PIC_, CM);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget,
+ createAlphaMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+ createAlphaMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
new file mode 100644
index 0000000..b0619e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheAlphaTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Alpha registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AlphaGenRegisterInfo.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AlphaGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
new file mode 100644
index 0000000..bdc69e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+llvm::Target llvm::TheAlphaTarget;
+
+extern "C" void LLVMInitializeAlphaTargetInfo() {
+ RegisterTarget<Triple::alpha, /*HasJIT=*/true>
+ X(TheAlphaTarget, "alpha", "Alpha [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/Blackfin.h b/contrib/llvm/lib/Target/Blackfin/Blackfin.h
new file mode 100644
index 0000000..a00ff4c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/Blackfin.h
@@ -0,0 +1,31 @@
+//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Blackfin back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_BLACKFIN_H
+#define TARGET_BLACKFIN_H
+
+#include "MCTargetDesc/BlackfinMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ class FunctionPass;
+ class BlackfinTargetMachine;
+
+ FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/Blackfin.td b/contrib/llvm/lib/Target/Blackfin/Blackfin.td
new file mode 100644
index 0000000..cd90962
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/Blackfin.td
@@ -0,0 +1,202 @@
+//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Blackfin Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
+ "Build for SDRAM">;
+
+def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
+ "Assume instruction cache lookaside buffers are enabled at runtime">;
+
+//===----------------------------------------------------------------------===//
+// Bugs in the silicon becomes workarounds in the compiler.
+// See http://www.analog.com/ for the full list of IC anomalies.
+//===----------------------------------------------------------------------===//
+
+def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
+ "Work around 05000074 - "
+ "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
+
+def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
+ "Work around 05000244 - "
+ "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
+
+def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
+ "Work around 05000245 - "
+ "Access in the Shadow of a Conditional Branch">;
+
+def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
+ "Work around 05000257 - "
+ "Interrupt/Exception During Short Hardware Loop">;
+
+def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
+ "Work around 05000283 - "
+ "System MMR Write Is Stalled Indefinitely when Killed">;
+
+def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
+ "Work around 05000312 - "
+ "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
+
+def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
+ "wa_killed_mmr", "true",
+ "Work around 05000315 - "
+ "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
+
+def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
+ "Work around 05000371 - "
+ "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
+
+def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
+ "Work around 05000426 - "
+ "Speculative Fetches of Indirect-Pointer Instructions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "BlackfinRegisterInfo.td"
+include "BlackfinCallingConv.td"
+include "BlackfinIntrinsics.td"
+include "BlackfinInstrInfo.td"
+
+def BlackfinInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Blackfin processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
+ : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
+
+def : Proc<"generic", "", []>;
+
+multiclass Core<string Name,string Suffix,
+ list<SubtargetFeature> Features> {
+ def : Proc<Name, Suffix, Features>;
+ def : Proc<Name, "", Features>;
+ def : Proc<Name, "-none", []>;
+}
+
+multiclass CoreEdinburgh<string Name>
+ : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
+ def : Proc<Name, "-0.5",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS]>;
+ def : Proc<Name, "-0.4",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS]>;
+ def : Proc<Name, "-0.3",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS]>;
+}
+multiclass CoreBraemar<string Name>
+ : Core<Name, "-0.3",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
+ def : Proc<Name, "-0.2",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreStirling<string Name>
+ : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.4",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-0.3",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMoab<string Name>
+ : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+ def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-0.0",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreTeton<string Name>
+ : Core<Name, "-0.5",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS, WA_IND_CALL]> {
+ def : Proc<Name, "-0.3",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreKookaburra<string Name>
+ : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMockingbird<string Name>
+ : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+ def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+multiclass CoreBrodie<string Name>
+ : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+ def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+
+defm BF512 : CoreBrodie<"bf512">;
+defm BF514 : CoreBrodie<"bf514">;
+defm BF516 : CoreBrodie<"bf516">;
+defm BF518 : CoreBrodie<"bf518">;
+defm BF522 : CoreMockingbird<"bf522">;
+defm BF523 : CoreKookaburra<"bf523">;
+defm BF524 : CoreMockingbird<"bf524">;
+defm BF525 : CoreKookaburra<"bf525">;
+defm BF526 : CoreMockingbird<"bf526">;
+defm BF527 : CoreKookaburra<"bf527">;
+defm BF531 : CoreEdinburgh<"bf531">;
+defm BF532 : CoreEdinburgh<"bf532">;
+defm BF533 : CoreEdinburgh<"bf533">;
+defm BF534 : CoreBraemar<"bf534">;
+defm BF536 : CoreBraemar<"bf536">;
+defm BF537 : CoreBraemar<"bf537">;
+defm BF538 : CoreStirling<"bf538">;
+defm BF539 : CoreStirling<"bf539">;
+defm BF542 : CoreMoab<"bf542">;
+defm BF544 : CoreMoab<"bf544">;
+defm BF548 : CoreMoab<"bf548">;
+defm BF549 : CoreMoab<"bf549">;
+defm BF561 : CoreTeton<"bf561">;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Blackfin : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = BlackfinInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
new file mode 100644
index 0000000..ed9844e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -0,0 +1,156 @@
+//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Blackfin.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class BlackfinAsmPrinter : public AsmPrinter {
+ public:
+ BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Blackfin Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ };
+} // end of anonymous namespace
+
+#include "BlackfinGenAsmWriter.inc"
+
+extern "C" void LLVMInitializeBlackfinAsmPrinter() {
+ RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
+}
+
+void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should be already mapped!");
+ O << getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ break;
+ }
+}
+
+void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << " + ";
+ printOperand(MI, opNum+1, O);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td b/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td
new file mode 100644
index 0000000..0abc84c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td
@@ -0,0 +1,30 @@
+//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Blackfin architectures.
+//
+//===----------------------------------------------------------------------===//
+
+// Blackfin C Calling convention.
+def CC_Blackfin : CallingConv<[
+ CCIfType<[i16], CCPromoteToType<i32>>,
+ CCIfSRet<CCAssignToReg<[P0]>>,
+ CCAssignToReg<[R0, R1, R2]>,
+ CCAssignToStack<4, 4>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Blackfin C return-value convention.
+def RetCC_Blackfin : CallingConv<[
+ CCIfType<[i16], CCPromoteToType<i32>>,
+ CCAssignToReg<[R0, R1]>
+]>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
new file mode 100644
index 0000000..0b0984d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
@@ -0,0 +1,130 @@
+//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinFrameLowering.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) ||
+ MFI->adjustsStack() || MFI->hasVarSizedObjects();
+}
+
+// Always reserve a call frame. We dont have enough registers to adjust SP.
+bool BlackfinFrameLowering::
+hasReservedCallFrame(const MachineFunction &MF) const {
+ return true;
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const BlackfinInstrInfo &TII =
+ *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ int FrameSize = MFI->getStackSize();
+ if (FrameSize%4) {
+ FrameSize = (FrameSize+3) & ~3;
+ MFI->setStackSize(FrameSize);
+ }
+
+ if (!hasFP(MF)) {
+ assert(!MFI->adjustsStack() &&
+ "FP elimination on a non-leaf function is not supported");
+ RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+ return;
+ }
+
+ // emit a LINK instruction
+ if (FrameSize <= 0x3ffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+ return;
+ }
+
+ // Frame is too big, do a manual LINK:
+ // [--SP] = RETS;
+ // [--SP] = FP;
+ // FP = SP;
+ // P1 = -FrameSize;
+ // SP = SP + P1;
+ BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+ .addReg(BF::RETS, RegState::Kill);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+ .addReg(BF::FP, RegState::Kill);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+ .addReg(BF::SP);
+ RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+ .addReg(BF::SP, RegState::Kill)
+ .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const BlackfinInstrInfo &TII =
+ *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ if (!hasFP(MF)) {
+ assert(!MFI->adjustsStack() &&
+ "FP elimination on a non-leaf function is not supported");
+ RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+ return;
+ }
+
+ // emit an UNLINK instruction
+ BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+void BlackfinFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const TargetRegisterClass *RC = BF::DPRegisterClass;
+
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
new file mode 100644
index 0000000..169aa8e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -0,0 +1,47 @@
+//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_FRAMEINFO_H
+#define BLACKFIN_FRAMEINFO_H
+
+#include "Blackfin.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class BlackfinSubtarget;
+
+class BlackfinFrameLowering : public TargetFrameLowering {
+protected:
+ const BlackfinSubtarget &STI;
+
+public:
+ explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
new file mode 100644
index 0000000..215ca43
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -0,0 +1,180 @@
+//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Blackfin target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinTargetMachine.h"
+#include "BlackfinRegisterInfo.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
+/// instructions for SelectionDAG operations.
+namespace {
+ class BlackfinDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
+ /// can make the right decision when generating code for different targets.
+ //const BlackfinSubtarget &Subtarget;
+ public:
+ BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+ virtual void PostprocessISelDAG();
+
+ virtual const char *getPassName() const {
+ return "Blackfin DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "BlackfinGenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Walk the DAG after instruction selection, fixing register class issues.
+ void FixRegisterClasses(SelectionDAG &DAG);
+
+ const BlackfinInstrInfo &getInstrInfo() {
+ return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
+ }
+ const BlackfinRegisterInfo *getRegisterInfo() {
+ return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
+ }
+ };
+} // end anonymous namespace
+
+FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new BlackfinDAGToDAGISel(TM, OptLevel);
+}
+
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
+ FixRegisterClasses(*CurDAG);
+}
+
+SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
+ // SP, Px
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ }
+ }
+
+ return SelectCode(N);
+}
+
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
+ SDValue &Base,
+ SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
+ (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+ (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+static inline bool isCC(const TargetRegisterClass *RC) {
+ return BF::AnyCCRegClass.hasSubClassEq(RC);
+}
+
+static inline bool isDCC(const TargetRegisterClass *RC) {
+ return BF::DRegClass.hasSubClassEq(RC) || isCC(RC);
+}
+
+static void UpdateNodeOperand(SelectionDAG &DAG,
+ SDNode *N,
+ unsigned Num,
+ SDValue Val) {
+ SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
+ ops[Num] = Val;
+ SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
+ DAG.ReplaceAllUsesWith(N, New);
+}
+
+// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
+// choosing the proper register classes.
+void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
+ const BlackfinInstrInfo &TII = getInstrInfo();
+ const BlackfinRegisterInfo *TRI = getRegisterInfo();
+ DAG.AssignTopologicalOrder();
+ HandleSDNode Dummy(DAG.getRoot());
+
+ for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
+ NI != DAG.allnodes_end(); ++NI) {
+ if (NI->use_empty() || !NI->isMachineOpcode())
+ continue;
+ const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
+ for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
+ if (!UI->isMachineOpcode())
+ continue;
+
+ if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
+ continue;
+ const TargetRegisterClass *DefRC =
+ TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
+
+ const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
+ if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
+ continue;
+ const TargetRegisterClass *UseRC =
+ TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
+ if (!DefRC || !UseRC)
+ continue;
+ // We cannot copy CC <-> !(CC/D)
+ if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
+ SDNode *Copy =
+ DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ NI->getDebugLoc(),
+ MVT::i32,
+ UI.getUse().get(),
+ DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
+ UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
+ }
+ }
+ }
+ DAG.setRoot(Dummy.getValue());
+}
+
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
new file mode 100644
index 0000000..7d4c45f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -0,0 +1,645 @@
+//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Blackfin uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinISelLowering.h"
+#include "BlackfinTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ setStackPointerRegisterToSaveRestore(BF::SP);
+ setIntDivIsCheap(false);
+
+ // Set up the legal register classes.
+ addRegisterClass(MVT::i32, BF::DRegisterClass);
+ addRegisterClass(MVT::i16, BF::D16RegisterClass);
+
+ computeRegisterProperties();
+
+ // Blackfin doesn't have i1 loads or stores
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // i16 registers don't do much
+ setOperationAction(ISD::AND, MVT::i16, Promote);
+ setOperationAction(ISD::OR, MVT::i16, Promote);
+ setOperationAction(ISD::XOR, MVT::i16, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+ // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
+ // immediately.
+ setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+ setOperationAction(ISD::SETCC, MVT::i16, Promote);
+
+ // Blackfin has no division
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ // No carry-in operations.
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+
+ // Blackfin has no intrinsics for these particular operations.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // i32 has native CTPOP, but not CTLZ/CTTZ
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+
+ // READCYCLECOUNTER needs special type legalization.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ setMinFunctionAlignment(2);
+}
+
+const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case BFISD::CALL: return "BFISD::CALL";
+ case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
+ case BFISD::Wrapper: return "BFISD::Wrapper";
+ }
+}
+
+EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+ // SETCC always sets the CC register. Technically that is an i1 register, but
+ // that type is not legal, so we treat it as an i32 register.
+ return MVT::i32;
+}
+
+SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ int JTI = cast<JumpTableSDNode>(Op)->getIndex();
+
+ Op = DAG.getTargetJumpTable(JTI, MVT::i32);
+ return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue
+BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
+ BF::PRegisterClass : BF::DRegisterClass;
+ assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
+ assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+ unsigned ObjSize = VA.getLocVT().getStoreSize();
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+BlackfinTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue Opi = OutVals[i];
+
+ // Expand to i32 if necessary
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
+ break;
+ case CCValAssign::ZExt:
+ Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
+ break;
+ case CCValAssign::AExt:
+ Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
+ break;
+ }
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode()) {
+ return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ } else {
+ return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
+ }
+}
+
+SDValue
+BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Blackfin target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space
+ CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+ int Offset = VA.getLocMemOffset();
+ assert(Offset%4 == 0 && "Unaligned LocMemOffset");
+ assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
+ SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
+ SDValue OffsetN = DAG.getIntPtrConstant(Offset);
+ OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
+ MachinePointerInfo(),false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ std::vector<EVT> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+ SDValue Ops[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
+ InFlag.getNode() ? 3 : 2);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &RV = RVLocs[i];
+ unsigned Reg = RV.getLocReg();
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getLocVT(), InFlag);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ Chain = Chain.getValue(1);
+
+ // Callee is responsible for extending any i16 return values.
+ switch (RV.getLocInfo()) {
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
+ DAG.getValueType(RV.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
+ DAG.getValueType(RV.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate to valtype
+ if (RV.getLocInfo() != CCValAssign::Full)
+ Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
+// add-with-carry instructions.
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
+ // Operands: lhs, rhs, carry-in (AC0 flag)
+ // Results: sum, carry-out (AC0 flag)
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
+
+ // zext incoming carry flag in AC0 to 32 bits
+ SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+ /* flag= */ Op.getOperand(2));
+ CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
+ SDValue(CarryIn, 0));
+
+ // Add operands, produce sum and carry flag
+ SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ Op.getOperand(0), Op.getOperand(1));
+
+ // Store intermediate carry from Sum
+ SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+ /* flag= */ SDValue(Sum, 1));
+
+ // Add incoming carry, again producing an output flag
+ Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ SDValue(Sum, 0), SDValue(CarryIn, 0));
+
+ // Update AC0 with the intermediate carry, producing a flag.
+ SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
+ SDValue(Carry1, 0));
+
+ // Compose (i32, flag) pair
+ SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
+ return DAG.getMergeValues(ops, 2, dl);
+}
+
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ llvm_unreachable("Should not custom lower this!");
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Blackfin.");
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ // Frame & Return address. Currently unimplemented
+ case ISD::FRAMEADDR: return SDValue();
+ case ISD::RETURNADDR: return SDValue();
+ case ISD::ADDE:
+ case ISD::SUBE: return LowerADDE(Op, DAG);
+ }
+}
+
+void
+BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::READCYCLECOUNTER: {
+ // The low part of the cycle counter is in CYCLES, the high part in
+ // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
+ // CYCLES2 last.
+ SDValue TheChain = N->getOperand(0);
+ SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
+ SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
+ // Outgoing chain. If we were to use the chain from lo instead, it would be
+ // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
+ // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
+ // read beyond the next CYCLES read, leading to invalid results.
+ Results.push_back(hi.getValue(1));
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Blackfin Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+BlackfinTargetLowering::ConstraintType
+BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() != 1)
+ return TargetLowering::getConstraintType(Constraint);
+
+ switch (Constraint[0]) {
+ // Standard constraints
+ case 'r':
+ return C_RegisterClass;
+
+ // Blackfin-specific constraints
+ case 'a':
+ case 'd':
+ case 'z':
+ case 'D':
+ case 'W':
+ case 'e':
+ case 'b':
+ case 'v':
+ case 'f':
+ case 'c':
+ case 't':
+ case 'u':
+ case 'k':
+ case 'x':
+ case 'y':
+ case 'w':
+ return C_RegisterClass;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'Z':
+ case 'Y':
+ return C_Register;
+ }
+
+ // Not implemented: q0-q7, qA. Use {R2} etc instead
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+BlackfinTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+
+ // Blackfin-specific constraints
+ case 'a':
+ case 'd':
+ case 'z':
+ case 'D':
+ case 'W':
+ case 'e':
+ case 'b':
+ case 'v':
+ case 'f':
+ case 'c':
+ case 't':
+ case 'u':
+ case 'k':
+ case 'x':
+ case 'y':
+ case 'w':
+ return CW_Register;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'Z':
+ case 'Y':
+ return CW_SpecificReg;
+ }
+ return weight;
+}
+
+/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
+/// constraint.
+std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
+ using namespace BF;
+
+ if (Constraint.size() != 1)
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ switch (Constraint[0]) {
+ // Standard constraints
+ case 'r':
+ return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
+
+ // Blackfin-specific constraints
+ case 'a': return Pair(0U, PRegisterClass);
+ case 'd': return Pair(0U, DRegisterClass);
+ case 'e': return Pair(0U, AccuRegisterClass);
+ case 'A': return Pair(A0, AccuRegisterClass);
+ case 'B': return Pair(A1, AccuRegisterClass);
+ case 'b': return Pair(0U, IRegisterClass);
+ case 'v': return Pair(0U, BRegisterClass);
+ case 'f': return Pair(0U, MRegisterClass);
+ case 'C': return Pair(CC, JustCCRegisterClass);
+ case 'x': return Pair(0U, GRRegisterClass);
+ case 'w': return Pair(0U, ALLRegisterClass);
+ case 'Z': return Pair(P3, PRegisterClass);
+ case 'Y': return Pair(P1, PRegisterClass);
+ case 'z': return Pair(0U, zConsRegisterClass);
+ case 'D': return Pair(0U, DConsRegisterClass);
+ case 'W': return Pair(0U, WConsRegisterClass);
+ case 'c': return Pair(0U, cConsRegisterClass);
+ case 't': return Pair(0U, tConsRegisterClass);
+ case 'u': return Pair(0U, uConsRegisterClass);
+ case 'k': return Pair(0U, kConsRegisterClass);
+ case 'y': return Pair(0U, yConsRegisterClass);
+ }
+
+ // Not implemented: q0-q7, qA. Use {R2} etc instead.
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool BlackfinTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Blackfin target isn't yet aware of offsets.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
new file mode 100644
index 0000000..90908ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -0,0 +1,83 @@
+//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Blackfin uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_ISELLOWERING_H
+#define BLACKFIN_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Blackfin.h"
+
+namespace llvm {
+
+ namespace BFISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ Wrapper // Address wrapper
+ };
+ }
+
+ class BlackfinTargetLowering : public TargetLowering {
+ public:
+ BlackfinTargetLowering(TargetMachine &TM);
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
+ virtual EVT getSetCCResultType(EVT VT) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ private:
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ };
+} // end namespace llvm
+
+#endif // BLACKFIN_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td
new file mode 100644
index 0000000..d8e6e25
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td
@@ -0,0 +1,34 @@
+//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "BF";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// Single-word (16-bit) instructions
+class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBfin<outs, ins, asmstr, pattern> {
+}
+
+// Double-word (32-bit) instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBfin<outs, ins, asmstr, pattern> {
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
new file mode 100644
index 0000000..c06a919
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -0,0 +1,256 @@
+//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "BlackfinGenInstrInfo.inc"
+
+using namespace llvm;
+
+BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
+ : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
+ RI(ST, *this),
+ Subtarget(ST) {}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case BF::LOAD32fi:
+ case BF::LOAD16fi:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case BF::STORE32fi:
+ case BF::STORE16fi:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned BlackfinInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "Branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ llvm_unreachable("Implement conditional branches!");
+}
+
+void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (BF::D16RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
+ if (BF::DRegClass.contains(DestReg)) {
+ if (SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
+ return;
+ }
+ if (SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ }
+
+ if (BF::DRegClass.contains(SrcReg)) {
+ if (DestReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+ return;
+ }
+ if (DestReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ }
+
+
+ if (DestReg == BF::NCC && SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (DestReg == BF::CC && SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ llvm_unreachable("Bad reg-to-reg copy");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+ unsigned Reg,
+ const TargetRegisterClass *RC) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Test.contains(Reg);
+ else
+ return Test.hasSubClassEq(RC);
+}
+
+void
+BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg,
+ bool isKill,
+ int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+
+ if (inClass(BF::DPRegClass, SrcReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::STORE32fi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::D16RegClass, SrcReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::STORE16fi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::STORE8fi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
+ RC->getName()).c_str());
+}
+
+void BlackfinInstrInfo::
+storeRegToAddr(MachineFunction &MF,
+ unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ llvm_unreachable("storeRegToAddr not implemented");
+}
+
+void
+BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (inClass(BF::DPRegClass, DestReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::D16RegClass, DestReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ llvm_unreachable("Cannot load regclass from stack slot");
+}
+
+void BlackfinInstrInfo::
+loadRegFromAddr(MachineFunction &MF,
+ unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ llvm_unreachable("loadRegFromAddr not implemented");
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h
new file mode 100644
index 0000000..d22ddf0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -0,0 +1,81 @@
+//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFININSTRUCTIONINFO_H
+#define BLACKFININSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "BlackfinRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "BlackfinGenInstrInfo.inc"
+
+namespace llvm {
+
+ class BlackfinInstrInfo : public BlackfinGenInstrInfo {
+ const BlackfinRegisterInfo RI;
+ const BlackfinSubtarget& Subtarget;
+ public:
+ explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned
+ InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
new file mode 100644
index 0000000..5b59d77
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -0,0 +1,862 @@
+//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Blackfin instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "BlackfinInstrFormats.td"
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Transformations
+//===----------------------------------------------------------------------===//
+
+def trailingZeros_xform : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+ MVT::i32);
+}]>;
+
+def trailingOnes_xform : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+ MVT::i32);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediates
+//===----------------------------------------------------------------------===//
+
+def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
+
+def uimm5m2 : PatLeaf<(imm), [{
+ uint64_t value = N->getZExtValue();
+ return value % 2 == 0 && isUInt<5>(value);
+}]>;
+
+def uimm6m4 : PatLeaf<(imm), [{
+ uint64_t value = N->getZExtValue();
+ return value % 4 == 0 && isUInt<6>(value);
+}]>;
+
+def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
+def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
+
+def ximm16 : PatLeaf<(imm), [{
+ int64_t value = N->getSExtValue();
+ return value < (1<<16) && value >= -(1<<15);
+}]>;
+
+def imm17m2 : PatLeaf<(imm), [{
+ int64_t value = N->getSExtValue();
+ return value % 2 == 0 && isInt<17>(value);
+}]>;
+
+def imm18m4 : PatLeaf<(imm), [{
+ int64_t value = N->getSExtValue();
+ return value % 4 == 0 && isInt<18>(value);
+}]>;
+
+// 32-bit bitmask transformed to a bit number
+def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
+ return isPowerOf2_32(N->getZExtValue());
+}], trailingZeros_xform>;
+
+// 32-bit inverse bitmask transformed to a bit number
+def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
+ return isPowerOf2_32(~N->getZExtValue());
+}], trailingOnes_xform>;
+
+//===----------------------------------------------------------------------===//
+// Operands
+//===----------------------------------------------------------------------===//
+
+def calltarget : Operand<iPTR>;
+
+def brtarget : Operand<OtherVT>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemoryOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBfin<outs, ins, asmstr, pattern>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "${:comment}ADJCALLSTACKDOWN $amt",
+ [(BfinCallseqStart timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "${:comment}ADJCALLSTACKUP $amt1 $amt2",
+ [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-9. Program Flow Control Instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+
+let isIndirectBranch = 1 in
+def JUMPp : F1<(outs), (ins P:$target),
+ "JUMP ($target);",
+ [(brind P:$target)]>;
+
+// TODO JUMP (PC-P)
+
+// NOTE: assembler chooses between JUMP.S and JUMP.L
+def JUMPa : F1<(outs), (ins brtarget:$target),
+ "jump $target;",
+ [(br bb:$target)]>;
+
+def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
+ "if $cc jump $target;",
+ [(brcond AnyCC:$cc, bb:$target)]>;
+}
+
+let isCall = 1,
+ Defs = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
+def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
+ "call $func;", []>;
+def CALLp: F1<(outs), (ins P:$func, variable_ops),
+ "call ($func);", [(BfinCall P:$func)]>;
+}
+
+let isReturn = 1,
+ isTerminator = 1,
+ isBarrier = 1,
+ Uses = [RETS] in
+def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-10. Load / Store Instructions
+//===----------------------------------------------------------------------===//
+
+// Immediate constant loads
+
+// sext immediate, i32 D/P regs
+def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
+ "$dst = $src (x);",
+ [(set DP:$dst, imm7:$src)]>;
+
+// zext immediate, i32 reg groups 0-3
+def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+ "$dst = $src (z);",
+ [(set GR:$dst, uimm16:$src)]>;
+
+// sext immediate, i32 reg groups 0-3
+def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+ "$dst = $src (x);",
+ [(set GR:$dst, imm16:$src)]>;
+
+// Pseudo-instruction for loading a general 32-bit constant.
+def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+ "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
+ [(set GR:$dst, imm:$src)]>;
+
+def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+ "$dst.h = $src; $dst.l = $src;", []>;
+
+
+// 16-bit immediate, i16 reg groups 0-3
+def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
+ "$dst = $src;", []>;
+
+def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
+ (LOAD32sym tglobaladdr:$addr)>;
+
+def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
+ (LOAD32sym tjumptable:$addr)>;
+
+// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
+// emit GR16 instructions. As a hack, we use this fake instruction instead.
+def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
+ "$dst = $src;",
+ [(set D16:$dst, ximm16:$src)]>;
+
+// Memory loads with patterns
+
+def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
+ "$dst = [$ptr];",
+ [(set DP:$dst, (load P:$ptr))]>;
+
+// Pseudo-instruction for loading a stack slot
+def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
+ "${:comment}FI $dst = [$mem];",
+ [(set DP:$dst, (load ADDRspii:$mem))]>;
+
+// Note: Expands to multiple insns
+def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
+ "${:comment}FI $dst = [$mem];",
+ [(set D16:$dst, (load ADDRspii:$mem))]>;
+
+// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
+// Replaced with Load D + CC=D
+def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
+ "${:comment}FI $dst = B[$mem];",
+ [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
+
+def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = [$ptr + $off];",
+ [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
+
+def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = [$ptr + $off];",
+ [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
+
+def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = W[$ptr] (z);",
+ [(set D:$dst, (zextloadi16 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
+
+def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (z);",
+ [(set D:$dst, (zextloadi16 (add P:$ptr,
+ uimm5m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
+ (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (z);",
+ [(set D:$dst,
+ (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
+ (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = w[$ptr] (x);",
+ [(set D:$dst, (sextloadi16 P:$ptr))]>;
+
+def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (x);",
+ [(set D:$dst,
+ (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
+
+def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (x);",
+ [(set D:$dst,
+ (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
+ "$dst = w[$ptr];",
+ [(set D16:$dst, (load PI:$ptr))]>;
+
+def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = B[$ptr] (z);",
+ [(set D:$dst, (zextloadi8 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
+def : Pat<(i16 (extloadi8 P:$ptr)),
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+def : Pat<(i16 (zextloadi8 P:$ptr)),
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = b[$ptr + $off] (z);",
+ [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
+ (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
+def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
+ (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+ lo16)>;
+def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
+ (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+ lo16)>;
+
+def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = b[$ptr] (x);",
+ [(set D:$dst, (sextloadi8 P:$ptr))]>;
+
+def : Pat<(i16 (sextloadi8 P:$ptr)),
+ (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = b[$ptr + $off] (x);",
+ [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
+ (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
+ lo16)>;
+// Memory loads without patterns
+
+let mayLoad = 1 in {
+
+multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
+ string mem="", string suf=";"> {
+ def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+ !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
+ def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+ !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
+}
+multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
+ string mem="", string suf=";">
+ : LOAD_incdec<drc, prc, mem, suf> {
+ def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
+ !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
+}
+
+defm LOAD32p: LOAD_incdec<DP, P>;
+defm LOAD32i: LOAD_incdec<D, I>;
+defm LOAD8z32p: LOAD_incdec<D, P, "b", " (z);">;
+defm LOAD8s32p: LOAD_incdec<D, P, "b", " (x);">;
+defm LOADhi: LOAD_incdec<D16, I, "w">;
+defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
+defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
+
+def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+ "$dst = [$ptr ++ $off];", []>;
+
+// Note: $fp MUST be FP
+def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
+ "$dst = [$fp - $off];", []>;
+
+def LOAD32i: F1<(outs D:$dst), (ins I:$ptr),
+ "$dst = [$ptr];", []>;
+def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
+ "$dst = [$ptr ++ $off];", []>;
+
+
+
+def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+ "$dst = w[$ptr ++ $off];", []>;
+
+
+}
+
+// Memory stores with patterns
+def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
+ "[$ptr] = $val;",
+ [(store DP:$val, P:$ptr)]>;
+
+// Pseudo-instructions for storing to a stack slot
+def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
+ "${:comment}FI [$mem] = $val;",
+ [(store DP:$val, ADDRspii:$mem)]>;
+
+// Note: This stack-storing pseudo-instruction is expanded to multiple insns
+def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
+ "${:comment}FI [$mem] = $val;",
+ [(store D16:$val, ADDRspii:$mem)]>;
+
+// Pseudo-instructions for storing AnyCC register to a stack slot.
+// Replaced with D=CC + STORE byte
+def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
+ "${:comment}FI b[$mem] = $val;",
+ [(store AnyCC:$val, ADDRspii:$mem)]>;
+
+def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+ "[$ptr + $off] = $val;",
+ [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
+
+def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+ "[$ptr + $off] = $val;",
+ [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
+
+def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
+ "w[$ptr] = $val;",
+ [(store D16:$val, PI:$ptr)]>;
+
+def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
+ "b[$ptr] = $val;",
+ [(truncstorei8 D:$val, P:$ptr)]>;
+
+def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
+ "b[$ptr + $off] = $val;",
+ [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
+
+let Constraints = "$ptr = $ptr_wb" in {
+
+multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
+ int off=4, string pre=""> {
+ def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+ !strconcat(pre, "[$ptr++] = $val;"),
+ [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
+ def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+ !strconcat(pre, "[$ptr--] = $val;"),
+ [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
+ (ineg off)))]>;
+}
+
+defm STORE32p: STORE_incdec<DP, P>;
+defm STORE16i: STORE_incdec<D16, I, 2, "w">;
+defm STORE8p: STORE_incdec<D, P, 1, "b">;
+
+def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
+ "[$ptr ++ $off] = $val;",
+ [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
+
+def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
+ "w[$ptr ++ $off] = $val;",
+ [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
+}
+
+// Memory stores without patterns
+
+let mayStore = 1 in {
+
+// Note: only works for $fp == FP
+def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
+ "[$fp - $off] = $val;", []>;
+
+def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
+ "[$ptr] = $val;", []>;
+
+def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+ "[$ptr++] = $val;", []>;
+
+def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+ "[$ptr--] = $val;", []>;
+
+def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
+ "[$ptr ++ $off] = $val;", []>;
+}
+
+def : Pat<(truncstorei16 D:$val, PI:$ptr),
+ (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+ lo16), PI:$ptr)>;
+
+def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
+ (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+ hi16), PI:$ptr)>;
+
+def : Pat<(truncstorei8 D16L:$val, P:$ptr),
+ (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
+ lo16),
+ P:$ptr)>;
+
+//===----------------------------------------------------------------------===//
+// Table C-11. Move Instructions.
+//===----------------------------------------------------------------------===//
+
+def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
+ "$dst = $src;",
+ []>;
+
+let Constraints = "$src1 = $dst" in
+def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
+ "if $cc $dst = $src2;",
+ [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
+
+let Defs = [AZ, AN, AC0, V] in {
+def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
+ "$dst = $src (z);",
+ [(set D:$dst, (zext D16L:$src))]>;
+
+def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
+ "$dst = $src (x);",
+ [(set D:$dst, (sext D16L:$src))]>;
+
+def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
+ "$dst = $src.b (z);",
+ [(set D:$dst, (and D:$src, 0xff))]>;
+
+def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
+ "$dst = $src.b (x);",
+ [(set D:$dst, (sext_inreg D:$src, i8))]>;
+
+}
+
+def : Pat<(sext_inreg D16L:$src, i8),
+ (EXTRACT_SUBREG (MOVEsext8
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ D16L:$src,
+ lo16)),
+ lo16)>;
+
+def : Pat<(sext_inreg D:$src, i16),
+ (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(and D:$src, 0xffff),
+ (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(i32 (anyext D16L:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
+ lo16)>;
+
+// TODO Dreg = Dreg_byte (X/Z)
+
+// TODO Accumulator moves
+
+//===----------------------------------------------------------------------===//
+// Table C-12. Stack Control Instructions
+//===----------------------------------------------------------------------===//
+
+let Uses = [SP], Defs = [SP] in {
+def PUSH: F1<(outs), (ins ALL:$src),
+ "[--sp] = $src;", []> { let mayStore = 1; }
+
+// NOTE: POP does not work for DP regs, use LOAD instead
+def POP: F1<(outs ALL:$dst), (ins),
+ "$dst = [sp++];", []> { let mayLoad = 1; }
+}
+
+// TODO: push/pop multiple
+
+def LINK: F2<(outs), (ins i32imm:$amount),
+ "link $amount;", []>;
+
+def UNLINK: F2<(outs), (ins),
+ "unlink;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-13. Control Code Bit Management Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
+ def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ [(set JustCC:$cc, (opnode D:$a, D:$b))]>;
+
+ def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ [(set JustCC:$cc, (opnode DP:$a, imm3:$b))]>;
+
+ def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ []>;
+
+ def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ [(set NotCC:$cc, (invnode DP:$a, imm3:$b))]>;
+}
+
+defm SETEQ : SETCC<seteq, setne, "==">;
+defm SETLT : SETCC<setlt, setge, "<">;
+defm SETLE : SETCC<setle, setgt, "<=">;
+defm SETULT : SETCC<setult, setuge, "<", " (iu);">;
+defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
+
+def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
+ "cc = $a == $b;",
+ [(set NotCC:$cc, (setne D:$a, D:$b))]>;
+
+def : Pat<(setgt D:$a, D:$b), (SETLTdd D:$b, D:$a)>;
+def : Pat<(setge D:$a, D:$b), (SETLEdd D:$b, D:$a)>;
+def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
+def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
+
+// TODO: compare pointer for P-P comparisons
+// TODO: compare accumulator
+
+let Defs = [AC0] in
+def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
+ "ac0 \\|= cc;", []>;
+
+let Uses = [AC0] in
+def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
+ "cc = ac0;", []>;
+
+def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
+ "cc = !cc;", []>;
+
+def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
+ "cc = !cc;", []>;
+
+def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
+ "$dst = $cc;", []>;
+
+def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
+ "$dst = cc;", []>;
+
+def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
+ "cc = $src;",
+ [(set AnyCC:$cc, (setne D:$src, 0))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-14. Logical Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 & $src2;",
+ [(set D:$dst, (and D:$src1, D:$src2))]>;
+
+def NOT: F1<(outs D:$dst), (ins D:$src),
+ "$dst = ~$src;",
+ [(set D:$dst, (not D:$src))]>;
+
+def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 \\| $src2;",
+ [(set D:$dst, (or D:$src1, D:$src2))]>;
+
+def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 ^ $src2;",
+ [(set D:$dst, (xor D:$src1, D:$src2))]>;
+
+// missing: BXOR, BXORSHIFT
+
+//===----------------------------------------------------------------------===//
+// Table C-15. Bit Operations Instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
+ "bitclr($dst, $src2);",
+ [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
+
+def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+ "bitset($dst, $src2);",
+ [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
+
+def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+ "bittgl($dst, $src2);",
+ [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
+}
+
+def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+ "cc = bittst($src1, $src2);",
+ [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
+ (i32 0)))]>;
+
+def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+ "cc = !bittst($src1, $src2);",
+ [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
+ (i32 0)))]>;
+
+// TODO: DEPOSIT, EXTRACT, BITMUX
+
+def ONES: F2<(outs D16L:$dst), (ins D:$src),
+ "$dst = ones $src;",
+ [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
+
+def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Table C-16. Shift / Rotate Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHIFT32<SDNode opnode, string ops> {
+ def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
+ !subst("XX", ops, "$dst XX= $amount;"),
+ [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
+ def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
+ !subst("XX", ops, "$dst XX= $amount;"),
+ [(set D:$dst, (opnode D:$src, D:$amount))]>;
+}
+
+let Defs = [AZ, AN, V, VS],
+ Constraints = "$src = $dst" in {
+defm SRA : SHIFT32<sra, ">>>">;
+defm SRL : SHIFT32<srl, ">>">;
+defm SLL : SHIFT32<shl, "<<">;
+}
+
+// TODO: automatic switching between 2-addr and 3-addr (?)
+
+let Defs = [AZ, AN, V, VS] in {
+def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+ "$dst = lshift $src by $amount;",
+ [(set D:$dst, (shl D:$src, D16L:$amount))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+ "$dst = ashift $src by $amount;",
+ [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
+
+def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+ "$dst = $src >>> $amount;",
+ [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
+
+def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+ "$dst = $src >> $amount;",
+ [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+ "$dst = ashift $src BY $amount;",
+ [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
+
+def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+ "$dst = $src << $amount;",
+ [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
+
+def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+ "$dst = lshift $src by $amount;",
+ [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-17. Arithmetic Operations Instructions
+//===----------------------------------------------------------------------===//
+
+// TODO: ABS
+
+let Defs = [AZ, AN, AC0, V, VS] in {
+
+def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 + $src2;",
+ [(set D:$dst, (add D:$src1, D:$src2))]>;
+
+def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 + $src2;",
+ [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
+
+let Constraints = "$src1 = $dst" in
+def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
+ "$dst += $src2;",
+ [(set D:$dst, (add D:$src1, imm7:$src2))]>;
+
+def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 - $src2;",
+ [(set D:$dst, (sub D:$src1, D:$src2))]>;
+
+def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 - $src2;",
+ [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
+
+}
+
+def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
+def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
+
+let Defs = [AZ, AN, V, VS] in
+def NEG: F1<(outs D:$dst), (ins D:$src),
+ "$dst = -$src;",
+ [(set D:$dst, (ineg D:$src))]>;
+
+// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
+def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
+ "$dst = $src1 + $src2;", []>;
+
+let Constraints = "$src1 = $dst" in
+def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
+ "$dst += $src2;", []>;
+
+let Defs = [AZ, AN, V] in
+def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 + $src2 (rnd20);", []>;
+
+let Defs = [V, VS] in {
+def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (is);",
+ [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
+
+def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (ih);",
+ [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
+
+def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (is);",
+ [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
+
+def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (is);",
+ [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
+}
+
+
+let Constraints = "$src1 = $dst" in
+def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst *= $src2;",
+ [(set D:$dst, (mul D:$src1, D:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-18. External Exent Management Instructions
+//===----------------------------------------------------------------------===//
+
+def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
+def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
+def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
+def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
+def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
+def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
+def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
+def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
+def NOP : F1<(outs), (ins), "nop;", []>;
+def MNOP : F2<(outs), (ins), "mnop;", []>;
+def ABORT : F1<(outs), (ins), "abort;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-19. Cache Control Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Table C-20. Video Pixel Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = align8($src1, $src2);",
+ [(set D:$dst, (or (shl D:$src1, (i32 24)),
+ (srl D:$src2, (i32 8))))]>;
+
+def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = align16($src1, $src2);",
+ [(set D:$dst, (or (shl D:$src1, (i32 16)),
+ (srl D:$src2, (i32 16))))]>;
+
+def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = align16($src1, $src2);",
+ [(set D:$dst, (or (shl D:$src1, (i32 8)),
+ (srl D:$src2, (i32 24))))]>;
+
+def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
+
+// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
+// BYTEPACK, BYTEUNPACK
+
+// Table C-21. Vector Operations Instructions
+
+// Patterns
+def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
+ (CALLa tglobaladdr:$dst)>;
+def : Pat<(BfinCall (i32 texternalsym:$dst)),
+ (CALLa texternalsym:$dst)>;
+def : Pat<(i16 (trunc D:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
new file mode 100644
index 0000000..7135676
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -0,0 +1,104 @@
+//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace bfinIntrinsic {
+
+ enum ID {
+ last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_bfin_intrinsics
+ };
+
+}
+
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned
+BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (IntrID == 0)
+ return false;
+ else
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
new file mode 100644
index 0000000..f05db5a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@@ -0,0 +1,32 @@
+//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BLACKFININTRINSICS_H
+#define BLACKFININTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td
new file mode 100644
index 0000000..ce21b08
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td
@@ -0,0 +1,34 @@
+//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the blackfin-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "bfin", isTarget = 1 in {
+
+//===----------------------------------------------------------------------===//
+// Core synchronisation etc.
+//
+// These intrinsics have sideeffects. Each represent a single instruction, but
+// workarounds are sometimes required depending on the cpu.
+
+// Execute csync instruction with workarounds
+def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
+ Intrinsic<[]>;
+
+// Execute ssync instruction with workarounds
+def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
+ Intrinsic<[]>;
+
+// Execute idle instruction with workarounds
+def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
+ Intrinsic<[]>;
+
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
new file mode 100644
index 0000000..0d415c5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -0,0 +1,344 @@
+//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinRegisterInfo.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
+ const TargetInstrInfo &tii)
+ : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {}
+
+const unsigned*
+BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ using namespace BF;
+ static const unsigned CalleeSavedRegs[] = {
+ FP,
+ R4, R5, R6, R7,
+ P3, P4, P5,
+ 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector
+BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ using namespace BF;
+ BitVector Reserved(getNumRegs());
+ Reserved.set(AZ);
+ Reserved.set(AN);
+ Reserved.set(AQ);
+ Reserved.set(AC0);
+ Reserved.set(AC1);
+ Reserved.set(AV0);
+ Reserved.set(AV0S);
+ Reserved.set(AV1);
+ Reserved.set(AV1S);
+ Reserved.set(V);
+ Reserved.set(VS);
+ Reserved.set(CYCLES).set(CYCLES2);
+ Reserved.set(L0);
+ Reserved.set(L1);
+ Reserved.set(L2);
+ Reserved.set(L3);
+ Reserved.set(SP);
+ Reserved.set(RETS);
+ if (TFI->hasFP(MF))
+ Reserved.set(FP);
+ return Reserved;
+}
+
+bool BlackfinRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+// Emit instructions to add delta to D/P register. ScratchReg must be of the
+// same class as Reg (P).
+void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ unsigned ScratchReg,
+ int delta) const {
+ if (!delta)
+ return;
+ if (isInt<7>(delta)) {
+ BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
+ .addReg(Reg) // No kill on two-addr operand
+ .addImm(delta);
+ return;
+ }
+
+ // We must load delta into ScratchReg and add that.
+ loadConstant(MBB, I, DL, ScratchReg, delta);
+ if (BF::PRegClass.contains(Reg)) {
+ assert(BF::PRegClass.contains(ScratchReg) &&
+ "ScratchReg must be a P register");
+ BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill);
+ } else {
+ assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
+ assert(BF::DRegClass.contains(ScratchReg) &&
+ "ScratchReg must be a D register");
+ BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill);
+ }
+}
+
+// Emit instructions to load a constant into D/P register
+void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ int value) const {
+ if (isInt<7>(value)) {
+ BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
+ return;
+ }
+
+ if (isUInt<16>(value)) {
+ BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
+ return;
+ }
+
+ if (isInt<16>(value)) {
+ BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
+ return;
+ }
+
+ // We must split into halves
+ BuildMI(MBB, I, DL,
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
+ .addImm((value >> 16) & 0xffff)
+ .addReg(Reg, RegState::ImplicitDefine);
+ BuildMI(MBB, I, DL,
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
+ .addImm(value & 0xffff)
+ .addReg(Reg, RegState::ImplicitKill)
+ .addReg(Reg, RegState::ImplicitDefine);
+}
+
+void BlackfinRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+ if (Amount != 0) {
+ assert(Amount%4 == 0 && "Unaligned call frame size");
+ if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
+ adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
+ } else {
+ assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
+ "Unknown call frame pseudo instruction");
+ adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) {
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ return Reg;
+}
+
+void
+BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned FIPos;
+ for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
+ assert(FIPos < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ int FrameIndex = MI.getOperand(FIPos).getIndex();
+ assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
+ + MI.getOperand(FIPos+1).getImm();
+ unsigned BaseReg = BF::FP;
+ if (TFI->hasFP(MF)) {
+ assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
+ } else {
+ BaseReg = BF::SP;
+ Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
+ }
+
+ bool isStore = false;
+
+ switch (MI.getOpcode()) {
+ case BF::STORE32fi:
+ isStore = true;
+ case BF::LOAD32fi: {
+ assert(Offset%4 == 0 && "Unaligned i32 stack access");
+ assert(FIPos==1 && "Bad frame index operand");
+ MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+ MI.getOperand(FIPos+1).setImm(Offset);
+ if (isUInt<6>(Offset)) {
+ MI.setDesc(TII.get(isStore
+ ? BF::STORE32p_uimm6m4
+ : BF::LOAD32p_uimm6m4));
+ return;
+ }
+ if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
+ MI.setDesc(TII.get(isStore
+ ? BF::STORE32fp_nimm7m4
+ : BF::LOAD32fp_nimm7m4));
+ MI.getOperand(FIPos+1).setImm(-Offset);
+ return;
+ }
+ if (isInt<18>(Offset)) {
+ MI.setDesc(TII.get(isStore
+ ? BF::STORE32p_imm18m4
+ : BF::LOAD32p_imm18m4));
+ return;
+ }
+ // Use RegScavenger to calculate proper offset...
+ MI.dump();
+ llvm_unreachable("Stack frame offset too big");
+ break;
+ }
+ case BF::ADDpp: {
+ assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ // We need to produce a stack offset in a P register. We emit:
+ // P0 = offset;
+ // P0 = BR + P0;
+ assert(FIPos==1 && "Bad frame index operand");
+ loadConstant(MBB, II, DL, DestReg, Offset);
+ MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(2).ChangeToRegister(BaseReg, false);
+ break;
+ }
+ case BF::STORE16fi:
+ isStore = true;
+ case BF::LOAD16fi: {
+ assert(Offset%2 == 0 && "Unaligned i16 stack access");
+ assert(FIPos==1 && "Bad frame index operand");
+ // We need a P register to use as an address
+ unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
+ assert(ScratchReg && "Could not scavenge register");
+ loadConstant(MBB, II, DL, ScratchReg, Offset);
+ BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(BaseReg);
+ MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
+ MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
+ MI.RemoveOperand(2);
+ break;
+ }
+ case BF::STORE8fi: {
+ // This is an AnyCC spill, we need a scratch register.
+ assert(FIPos==1 && "Bad frame index operand");
+ MachineOperand SpillReg = MI.getOperand(0);
+ unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+ assert(ScratchReg && "Could not scavenge register");
+ if (SpillReg.getReg()==BF::NCC) {
+ BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
+ .addOperand(SpillReg);
+ BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
+ .addReg(ScratchReg).addImm(0);
+ } else {
+ BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
+ .addOperand(SpillReg);
+ }
+ // STORE D
+ MI.setDesc(TII.get(BF::STORE8p_imm16));
+ MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
+ MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+ MI.getOperand(FIPos+1).setImm(Offset);
+ break;
+ }
+ case BF::LOAD8fi: {
+ // This is an restore, we need a scratch register.
+ assert(FIPos==1 && "Bad frame index operand");
+ MachineOperand SpillReg = MI.getOperand(0);
+ unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+ assert(ScratchReg && "Could not scavenge register");
+ MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
+ MI.getOperand(0).ChangeToRegister(ScratchReg, true);
+ MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+ MI.getOperand(FIPos+1).setImm(Offset);
+ ++II;
+ if (SpillReg.getReg()==BF::CC) {
+ // CC = D
+ BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
+ .addReg(ScratchReg, RegState::Kill);
+ } else {
+ // Restore NCC (CC = D==0)
+ BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(0);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Cannot eliminate frame index");
+ break;
+ }
+}
+
+unsigned
+BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? BF::FP : BF::SP;
+}
+
+unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
new file mode 100644
index 0000000..6ac22af
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -0,0 +1,77 @@
+//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINREGISTERINFO_H
+#define BLACKFINREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "BlackfinGenRegisterInfo.inc"
+
+namespace llvm {
+
+ class BlackfinSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
+ BlackfinSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ // getSubReg implemented by tablegen
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
+ return &BF::PRegClass;
+ }
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ // Utility functions
+ void adjustRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ unsigned ScratchReg,
+ int delta) const;
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ int value) const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
new file mode 100644
index 0000000..1c42205
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -0,0 +1,277 @@
+//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Blackfin register file
+//===----------------------------------------------------------------------===//
+
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
+
+// Registers are identified with 3-bit group and 3-bit ID numbers.
+class BlackfinReg<string n> : Register<n> {
+ field bits<3> Group;
+ field bits<3> Num;
+ let Namespace = "BF";
+}
+
+// Rc - 1-bit registers
+class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
+ field bits<5> BitNum = bitno;
+}
+
+// Rs - 16-bit integer registers
+class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
+ let Group = group;
+ let Num = num;
+ field bits<1> High = hi;
+}
+
+// Ri - 32-bit integer registers with subregs
+class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
+ let Group = group;
+ let Num = num;
+}
+
+// Ra 40-bit accumulator registers
+class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
+ let SubRegs = subs;
+ let SubRegIndices = [hi32, lo32];
+ let Group = 4;
+ let Num = num;
+}
+
+// Two halves of 32-bit register
+multiclass Rss<bits<3> group, bits<3> num, string n> {
+ def H : Rs<group, num, 1, !strconcat(n, ".h")>;
+ def L : Rs<group, num, 0, !strconcat(n, ".l")>;
+}
+
+// Rii - 32-bit integer registers with subregs
+class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
+ : BlackfinReg<n> {
+ let SubRegs = subs;
+ let SubRegIndices = [hi16, lo16];
+ let Group = group;
+ let Num = num;
+}
+
+// Status bits are all part of ASTAT
+def AZ : Rc<0, "az">;
+def AN : Rc<1, "an">;
+def CC : Rc<5, "cc">, DwarfRegNum<[34]>;
+def NCC : Rc<5, "!cc"> { let Aliases = [CC]; }
+def AQ : Rc<6, "aq">;
+def AC0 : Rc<12, "ac0">;
+def AC1 : Rc<13, "ac1">;
+def AV0 : Rc<16, "av0">;
+def AV0S : Rc<17, "av0s">;
+def AV1 : Rc<18, "av1">;
+def AV1S : Rc<19, "av1s">;
+def V : Rc<24, "v">;
+def VS : Rc<25, "vs">;
+// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
+
+// Group 0: Integer registers
+defm R0 : Rss<0, 0, "r0">;
+def R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
+defm R1 : Rss<0, 1, "r1">;
+def R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
+defm R2 : Rss<0, 2, "r2">;
+def R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
+defm R3 : Rss<0, 3, "r3">;
+def R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
+defm R4 : Rss<0, 4, "r4">;
+def R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
+defm R5 : Rss<0, 5, "r5">;
+def R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
+defm R6 : Rss<0, 6, "r6">;
+def R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
+defm R7 : Rss<0, 7, "r7">;
+def R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
+
+// Group 1: Pointer registers
+defm P0 : Rss<1, 0, "p0">;
+def P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
+defm P1 : Rss<1, 1, "p1">;
+def P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
+defm P2 : Rss<1, 2, "p2">;
+def P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
+defm P3 : Rss<1, 3, "p3">;
+def P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
+defm P4 : Rss<1, 4, "p4">;
+def P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
+defm P5 : Rss<1, 5, "p5">;
+def P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
+defm SP : Rss<1, 6, "sp">;
+def SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
+defm FP : Rss<1, 7, "fp">;
+def FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
+
+// Group 2: Index registers
+defm I0 : Rss<2, 0, "i0">;
+def I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
+defm I1 : Rss<2, 1, "i1">;
+def I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
+defm I2 : Rss<2, 2, "i2">;
+def I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
+defm I3 : Rss<2, 3, "i3">;
+def I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
+defm M0 : Rss<2, 4, "m0">;
+def M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
+defm M1 : Rss<2, 5, "m1">;
+def M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
+defm M2 : Rss<2, 6, "m2">;
+def M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
+defm M3 : Rss<2, 7, "m3">;
+def M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
+
+// Group 3: Cyclic indexing registers
+defm B0 : Rss<3, 0, "b0">;
+def B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
+defm B1 : Rss<3, 1, "b1">;
+def B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
+defm B2 : Rss<3, 2, "b2">;
+def B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
+defm B3 : Rss<3, 3, "b3">;
+def B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
+defm L0 : Rss<3, 4, "l0">;
+def L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
+defm L1 : Rss<3, 5, "l1">;
+def L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
+defm L2 : Rss<3, 6, "l2">;
+def L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
+defm L3 : Rss<3, 7, "l3">;
+def L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
+
+// Accumulators
+def A0X : Ri <4, 0, "a0.x">;
+defm A0 : Rss<4, 1, "a0">;
+def A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
+def A0 : Ra <0, "a0", [A0X, A0W]>;
+
+def A1X : Ri <4, 2, "a1.x">;
+defm A1 : Rss<4, 3, "a1">;
+def A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
+def A1 : Ra <2, "a1", [A1X, A1W]>;
+
+def RETS : Ri<4, 7, "rets">, DwarfRegNum<[35]>;
+def RETI : Ri<7, 3, "reti">, DwarfRegNum<[36]>;
+def RETX : Ri<7, 4, "retx">, DwarfRegNum<[37]>;
+def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>;
+def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>;
+
+def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> {
+ let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+}
+
+def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
+def USP : Ri<7, 0, "usp">, DwarfRegNum<[42]>;
+def EMUDAT : Ri<7, 7, "emudat">, DwarfRegNum<[43]>;
+def SYSCFG : Ri<7, 2, "syscfg">;
+def CYCLES : Ri<6, 6, "cycles">;
+def CYCLES2 : Ri<6, 7, "cycles2">;
+
+// Hardware loops
+def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
+def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
+def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
+def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
+def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
+def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
+
+// Register classes.
+def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
+
+def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
+
+def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
+
+def P16L : RegisterClass<"BF", [i16], 16,
+ (add (sequence "P%uL", 0, 5), SPL, FPL)>;
+
+def P16H : RegisterClass<"BF", [i16], 16,
+ (add (sequence "P%uH", 0, 5), SPH, FPH)>;
+
+def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
+
+def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
+
+def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
+
+def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
+
+def GR16 : RegisterClass<"BF", [i16], 16,
+ (add DP16,
+ I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
+ M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
+ B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
+ L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
+
+def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
+ let SubRegClasses = [(D16L lo16), (D16H hi16)];
+}
+
+def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
+ let SubRegClasses = [(P16L lo16), (P16H hi16)];
+}
+
+def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
+ let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
+}
+
+def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
+def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
+def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
+def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
+
+def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
+
+def ALL : RegisterClass<"BF", [i32], 32,
+ (add GR,
+ A0X, A0W, A1X, A1W, ASTAT, RETS,
+ LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
+ USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
+
+def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
+
+// We are going to pretend that CC and !CC are 32-bit registers, even though
+// they only can hold 1 bit.
+let CopyCost = -1, Size = 8 in {
+def JustCC : RegisterClass<"BF", [i32], 8, (add CC)>;
+def NotCC : RegisterClass<"BF", [i32], 8, (add NCC)>;
+def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
+def StatBit : RegisterClass<"BF", [i1], 8,
+ (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
+}
+
+// Should be i40, but that isn't defined. It is not a legal type yet anyway.
+def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
+
+// Register classes to match inline asm constraints.
+def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
+def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
+def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
+def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
+ B0, B1, B2, B3,
+ L0, L1, L2, L3)>;
+def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
+def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
+def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
+def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
+ RETE, ASTAT, SEQSTAT,
+ USP)>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
new file mode 100644
index 0000000..a21f696
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -0,0 +1,24 @@
+//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BlackfinSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "blackfin-selectiondag-info"
+#include "BlackfinTargetMachine.h"
+using namespace llvm;
+
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+ const BlackfinTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
new file mode 100644
index 0000000..f1ce348
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINSELECTIONDAGINFO_H
+#define BLACKFINSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class BlackfinTargetMachine;
+
+class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
+ ~BlackfinSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
new file mode 100644
index 0000000..0bdce09
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -0,0 +1,44 @@
+//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the blackfin specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "BlackfinGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
+ icplb(false),
+ wa_mi_shift(false),
+ wa_csync(false),
+ wa_specld(false),
+ wa_mmr_stall(false),
+ wa_lcregs(false),
+ wa_hwloop(false),
+ wa_ind_call(false),
+ wa_killed_mmr(false),
+ wa_rets(false)
+{
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h
new file mode 100644
index 0000000..1a01a81
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -0,0 +1,49 @@
+//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_SUBTARGET_H
+#define BLACKFIN_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "BlackfinGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+ class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
+ bool sdram;
+ bool icplb;
+ bool wa_mi_shift;
+ bool wa_csync;
+ bool wa_specld;
+ bool wa_mmr_stall;
+ bool wa_lcregs;
+ bool wa_hwloop;
+ bool wa_ind_call;
+ bool wa_killed_mmr;
+ bool wa_rets;
+ public:
+ BlackfinSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
new file mode 100644
index 0000000..a4ae46b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -0,0 +1,43 @@
+//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinTargetMachine.h"
+#include "Blackfin.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeBlackfinTarget() {
+ RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
+}
+
+BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ Reloc::Model RM,
+ CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ DataLayout("e-p:32:32-i64:32-f64:32-n32"),
+ Subtarget(TT, CPU, FS),
+ TLInfo(*this),
+ TSInfo(*this),
+ InstrInfo(Subtarget),
+ FrameLowering(Subtarget) {
+}
+
+bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createBlackfinISelDag(*this, OptLevel));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
new file mode 100644
index 0000000..c85337fe2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -0,0 +1,68 @@
+//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Blackfin specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETMACHINE_H
+#define BLACKFINTARGETMACHINE_H
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinIntrinsicInfo.h"
+#include "BlackfinISelLowering.h"
+#include "BlackfinFrameLowering.h"
+#include "BlackfinSubtarget.h"
+#include "BlackfinSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+ class BlackfinTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout;
+ BlackfinSubtarget Subtarget;
+ BlackfinTargetLowering TLInfo;
+ BlackfinSelectionDAGInfo TSInfo;
+ BlackfinInstrInfo InstrInfo;
+ BlackfinFrameLowering FrameLowering;
+ BlackfinIntrinsicInfo IntrinsicInfo;
+ public:
+ BlackfinTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const BlackfinSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const BlackfinRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const BlackfinTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ const TargetIntrinsicInfo *getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
new file mode 100644
index 0000000..5b9d4a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the BlackfinMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCAsmInfo.h"
+
+using namespace llvm;
+
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
+ GlobalPrefix = "_";
+ CommentString = "//";
+ HasSetDirective = false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
new file mode 100644
index 0000000..c372aa2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BlackfinMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETASMINFO_H
+#define BLACKFINTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct BlackfinMCAsmInfo : public MCAsmInfo {
+ explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
new file mode 100644
index 0000000..272e3c2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -0,0 +1,81 @@
+//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCTargetDesc.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "BlackfinGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createBlackfinMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitBlackfinMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitBlackfinMCRegisterInfo(X, BF::RETS);
+ return X;
+}
+
+static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeBlackfinTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget,
+ createBlackfinMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+ createBlackfinMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget,
+ createBlackfinMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+ createBlackfinMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
new file mode 100644
index 0000000..5bffe94
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINMCTARGETDESC_H
+#define BLACKFINMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheBlackfinTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Blackfin registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "BlackfinGenRegisterInfo.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#define GET_INSTRINFO_ENUM
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "BlackfinGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
new file mode 100644
index 0000000..57f1d3e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheBlackfinTarget;
+
+extern "C" void LLVMInitializeBlackfinTargetInfo() {
+ RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
+ "Analog Devices Blackfin [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 0000000..69d8c46
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,3617 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
+using namespace llvm;
+
+extern "C" void LLVMInitializeCBackendTarget() {
+ // Register the target.
+ RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
+}
+
+namespace {
+ class CBEMCAsmInfo : public MCAsmInfo {
+ public:
+ CBEMCAsmInfo() {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = "";
+ }
+ };
+
+ /// CWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C translation unit.
+ class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+ formatted_raw_ostream &Out;
+ IntrinsicLowering *IL;
+ Mangler *Mang;
+ LoopInfo *LI;
+ const Module *TheModule;
+ const MCAsmInfo* TAsm;
+ const MCRegisterInfo *MRI;
+ const MCObjectFileInfo *MOFI;
+ MCContext *TCtx;
+ const TargetData* TD;
+
+ std::map<const ConstantFP *, unsigned> FPConstantMap;
+ std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+ std::set<const Argument*> ByValParams;
+ unsigned FPCounter;
+ unsigned OpaqueCounter;
+ DenseMap<const Value*, unsigned> AnonValueNumbers;
+ unsigned NextAnonValueNumber;
+
+ /// UnnamedStructIDs - This contains a unique ID for each struct that is
+ /// either anonymous or has no name.
+ DenseMap<StructType*, unsigned> UnnamedStructIDs;
+
+ public:
+ static char ID;
+ explicit CWriter(formatted_raw_ostream &o)
+ : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
+ TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0),
+ OpaqueCounter(0), NextAnonValueNumber(0) {
+ initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+ FPCounter = 0;
+ }
+
+ virtual const char *getPassName() const { return "C backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ bool runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LI = &getAnalysis<LoopInfo>();
+
+ // Get rid of intrinsics we can't handle.
+ lowerIntrinsics(F);
+
+ // Output all floating point constants that cannot be printed accurately.
+ printFloatingPointConstants(F);
+
+ printFunction(F);
+ return false;
+ }
+
+ virtual bool doFinalization(Module &M) {
+ // Free memory...
+ delete IL;
+ delete TD;
+ delete Mang;
+ delete TCtx;
+ delete TAsm;
+ delete MRI;
+ delete MOFI;
+ FPConstantMap.clear();
+ ByValParams.clear();
+ intrinsicPrototypesAlreadyGenerated.clear();
+ UnnamedStructIDs.clear();
+ return false;
+ }
+
+ raw_ostream &printType(raw_ostream &Out, Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false,
+ const AttrListPtr &PAL = AttrListPtr());
+ raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+
+ void printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ PointerType *Ty);
+
+ std::string getStructName(StructType *ST);
+
+ /// writeOperandDeref - Print the result of dereferencing the specified
+ /// operand with '*'. This is equivalent to printing '*' then using
+ /// writeOperand, but avoids excess syntax in some cases.
+ void writeOperandDeref(Value *Operand) {
+ if (isAddressExposed(Operand)) {
+ // Already something with an address exposed.
+ writeOperandInternal(Operand);
+ } else {
+ Out << "*(";
+ writeOperand(Operand);
+ Out << ")";
+ }
+ }
+
+ void writeOperand(Value *Operand, bool Static = false);
+ void writeInstComputationInline(Instruction &I);
+ void writeOperandInternal(Value *Operand, bool Static = false);
+ void writeOperandWithCast(Value* Operand, unsigned Opcode);
+ void writeOperandWithCast(Value* Operand, const ICmpInst &I);
+ bool writeInstructionCast(const Instruction &I);
+
+ void writeMemoryAccess(Value *Operand, Type *OperandType,
+ bool IsVolatile, unsigned Alignment);
+
+ private :
+ std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+ void lowerIntrinsics(Function &F);
+ /// Prints the definition of the intrinsic function F. Supports the
+ /// intrinsics which need to be explicitly defined in the CBackend.
+ void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
+
+ void printModuleTypes();
+ void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &);
+ void printFloatingPointConstants(Function &F);
+ void printFloatingPointConstants(const Constant *C);
+ void printFunctionSignature(const Function *F, bool Prototype);
+
+ void printFunction(Function &);
+ void printBasicBlock(BasicBlock *BB);
+ void printLoop(Loop *L);
+
+ void printCast(unsigned opcode, Type *SrcTy, Type *DstTy);
+ void printConstant(Constant *CPV, bool Static);
+ void printConstantWithCast(Constant *CPV, unsigned Opcode);
+ bool printConstExprCast(const ConstantExpr *CE, bool Static);
+ void printConstantArray(ConstantArray *CPA, bool Static);
+ void printConstantVector(ConstantVector *CV, bool Static);
+
+ /// isAddressExposed - Return true if the specified value's name needs to
+ /// have its address taken in order to get a C value of the correct type.
+ /// This happens for global variables, byval parameters, and direct allocas.
+ bool isAddressExposed(const Value *V) const {
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return ByValParams.count(A);
+ return isa<GlobalVariable>(V) || isDirectAlloca(V);
+ }
+
+ // isInlinableInst - Attempt to inline instructions into their uses to build
+ // trees as much as possible. To do this, we have to consistently decide
+ // what is acceptable to inline, so that variable declarations don't get
+ // printed and an extra copy of the expr is not emitted.
+ //
+ static bool isInlinableInst(const Instruction &I) {
+ // Always inline cmp instructions, even if they are shared by multiple
+ // expressions. GCC generates horrible code if we don't.
+ if (isa<CmpInst>(I))
+ return true;
+
+ // Must be an expression, must be used exactly once. If it is dead, we
+ // emit it inline where it would go.
+ if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
+ isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+ isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
+ isa<InsertValueInst>(I))
+ // Don't inline a load across a store or other bad things!
+ return false;
+
+ // Must not be used in inline asm, extractelement, or shufflevector.
+ if (I.hasOneUse()) {
+ const Instruction &User = cast<Instruction>(*I.use_back());
+ if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
+ isa<ShuffleVectorInst>(User))
+ return false;
+ }
+
+ // Only inline instruction it if it's use is in the same BB as the inst.
+ return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+ }
+
+ // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+ // variables which are accessed with the & operator. This causes GCC to
+ // generate significantly better code than to emit alloca calls directly.
+ //
+ static const AllocaInst *isDirectAlloca(const Value *V) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI) return 0;
+ if (AI->isArrayAllocation())
+ return 0; // FIXME: we can also inline fixed size array allocas!
+ if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+ return 0;
+ return AI;
+ }
+
+ // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
+ static bool isInlineAsm(const Instruction& I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ return isa<InlineAsm>(CI->getCalledValue());
+ return false;
+ }
+
+ // Instruction visitation functions
+ friend class InstVisitor<CWriter>;
+
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+ void visitInvokeInst(InvokeInst &I) {
+ llvm_unreachable("Lowerinvoke pass didn't work!");
+ }
+ void visitUnwindInst(UnwindInst &I) {
+ llvm_unreachable("Lowerinvoke pass didn't work!");
+ }
+ void visitResumeInst(ResumeInst &I) {
+ llvm_unreachable("DwarfEHPrepare pass didn't work!");
+ }
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitPHINode(PHINode &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+
+ void visitCastInst (CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitCallInst (CallInst &I);
+ void visitInlineAsm(CallInst &I);
+ bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+
+ void visitAllocaInst(AllocaInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitStoreInst (StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitVAArgInst (VAArgInst &I);
+
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+
+ void visitInstruction(Instruction &I) {
+#ifndef NDEBUG
+ errs() << "C Writer does not know about " << I;
+#endif
+ llvm_unreachable(0);
+ }
+
+ void outputLValue(Instruction *I) {
+ Out << " " << GetValueName(I) << " = ";
+ }
+
+ bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+ void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+ BasicBlock *Successor, unsigned Indent);
+ void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+ unsigned Indent);
+ void printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static);
+
+ std::string GetValueName(const Value *Operand);
+ };
+}
+
+char CWriter::ID = 0;
+
+
+
+static std::string CBEMangle(const std::string &S) {
+ std::string Result;
+
+ for (unsigned i = 0, e = S.size(); i != e; ++i)
+ if (isalnum(S[i]) || S[i] == '_') {
+ Result += S[i];
+ } else {
+ Result += '_';
+ Result += 'A'+(S[i]&15);
+ Result += 'A'+((S[i]>>4)&15);
+ Result += '_';
+ }
+ return Result;
+}
+
+std::string CWriter::getStructName(StructType *ST) {
+ if (!ST->isLiteral() && !ST->getName().empty())
+ return CBEMangle("l_"+ST->getName().str());
+
+ return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
+}
+
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ PointerType *TheTy) {
+ FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
+ FunctionInnards << " (*) (";
+ bool PrintedType = false;
+
+ FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+ Type *RetTy = cast<PointerType>(*I)->getElementType();
+ unsigned Idx = 1;
+ for (++I, ++Idx; I != E; ++I, ++Idx) {
+ if (PrintedType)
+ FunctionInnards << ", ";
+ Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ PrintedType = true;
+ }
+ if (FTy->isVarArg()) {
+ if (!PrintedType)
+ FunctionInnards << " int"; //dummy argument for empty vararg functs
+ FunctionInnards << ", ...";
+ } else if (!PrintedType) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+}
+
+raw_ostream &
+CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else if (NumBits <= 64)
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ else {
+ assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+ return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+ // present matches host 'long double'.
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: return Out << "long double " << NameSoFar;
+
+ case Type::X86_MMXTyID:
+ return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+ " __attribute__((vector_size(64))) " + NameSoFar);
+
+ case Type::VectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return printSimpleType(Out, VTy->getElementType(), isSigned,
+ " __attribute__((vector_size(" +
+ utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+ }
+
+ default:
+#ifndef NDEBUG
+ errs() << "Unknown primitive type: " << *Ty << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName, const AttrListPtr &PAL) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ FunctionType *FTy = cast<FunctionType>(Ty);
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
+ FunctionInnards << " (" << NameSoFar << ") (";
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (!FTy->getNumParams())
+ FunctionInnards << " int"; //dummy argument for empty vaarg functs
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+ return Out;
+ }
+ case Type::StructTyID: {
+ StructType *STy = cast<StructType>(Ty);
+
+ // Check to see if the type is named.
+ if (!IgnoreName)
+ return Out << getStructName(STy) << ' ' << NameSoFar;
+
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (PTy->getElementType()->isArrayTy() ||
+ PTy->getElementType()->isVectorTy())
+ ptrName = "(" + ptrName + ")";
+
+ if (!PAL.isEmpty())
+ // Must be a function ptr cast!
+ return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ // Arrays are wrapped in structs to allow them to have normal
+ // value semantics (avoiding the array "decay").
+ Out << NameSoFar << " { ";
+ printType(Out, ATy->getElementType(), false,
+ "array[" + utostr(NumElements) + "]");
+ return Out << "; }";
+ }
+
+ default:
+ llvm_unreachable("Unhandled case in getTypeProps!");
+ }
+
+ return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
+
+ // As a special case, print the array as a string if it is an array of
+ // ubytes or an array of sbytes with positive values.
+ //
+ Type *ETy = CPA->getType()->getElementType();
+ bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
+ ETy == Type::getInt8Ty(CPA->getContext()));
+
+ // Make sure the last character is a null char, as automatically added by C
+ if (isString && (CPA->getNumOperands() == 0 ||
+ !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+ isString = false;
+
+ if (isString) {
+ Out << '\"';
+ // Keep track of whether the last number was a hexadecimal escape.
+ bool LastWasHex = false;
+
+ // Do not include the last character, which we know is null
+ for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+ unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+ // Print it out literally if it is a printable character. The only thing
+ // to be careful about is when the last letter output was a hex escape
+ // code, in which case we have to be careful not to print out hex digits
+ // explicitly (the C compiler thinks it is a continuation of the previous
+ // character, sheesh...)
+ //
+ if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+ LastWasHex = false;
+ if (C == '"' || C == '\\')
+ Out << "\\" << (char)C;
+ else
+ Out << (char)C;
+ } else {
+ LastWasHex = false;
+ switch (C) {
+ case '\n': Out << "\\n"; break;
+ case '\t': Out << "\\t"; break;
+ case '\r': Out << "\\r"; break;
+ case '\v': Out << "\\v"; break;
+ case '\a': Out << "\\a"; break;
+ case '\"': Out << "\\\""; break;
+ case '\'': Out << "\\\'"; break;
+ default:
+ Out << "\\x";
+ Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+ Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ LastWasHex = true;
+ break;
+ }
+ }
+ }
+ Out << '\"';
+ } else {
+ Out << '{';
+ if (CPA->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPA->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPA->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
+ Out << '{';
+ if (CP->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CP->getOperand(0)), Static);
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CP->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+ bool ignored;
+ // Do long doubles in hex for now.
+ if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
+ CFP->getType() != Type::getDoubleTy(CFP->getContext()))
+ return false;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%a", APF.convertToDouble());
+ if (!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3))
+ return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
+ return false;
+#else
+ std::string StrVal = ftostr(APF);
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like "Inf"
+ // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9')))
+ // Reparse stringized version!
+ return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
+ return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
+ // Print the destination type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::IntToPtr:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+ Out << '(';
+ printType(Out, DstTy);
+ Out << ')';
+ break;
+ case Instruction::ZExt:
+ case Instruction::PtrToInt:
+ case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+ Out << '(';
+ printSimpleType(Out, DstTy, false);
+ Out << ')';
+ break;
+ case Instruction::SExt:
+ case Instruction::FPToSI: // For these, make sure we get a signed dest
+ Out << '(';
+ printSimpleType(Out, DstTy, true);
+ Out << ')';
+ break;
+ default:
+ llvm_unreachable("Invalid cast opcode");
+ }
+
+ // Print the source type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::ZExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, false);
+ Out << ')';
+ break;
+ case Instruction::SIToFP:
+ case Instruction::SExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, true);
+ Out << ')';
+ break;
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // Avoid "cast to pointer from integer of different size" warnings
+ Out << "(unsigned long)";
+ break;
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ break; // These don't need a source cast.
+ default:
+ llvm_unreachable("Invalid cast opcode");
+ break;
+ }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV, bool Static) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Out << "(";
+ printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+ if (CE->getOpcode() == Instruction::SExt &&
+ CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
+ // Make sure we really sext from bool here by subtracting from 0
+ Out << "0-";
+ }
+ printConstant(CE->getOperand(0), Static);
+ if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
+ (CE->getOpcode() == Instruction::Trunc ||
+ CE->getOpcode() == Instruction::FPToUI ||
+ CE->getOpcode() == Instruction::FPToSI ||
+ CE->getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really truncate to bool here by anding with 1
+ Out << "&1u";
+ }
+ Out << ')';
+ return;
+
+ case Instruction::GetElementPtr:
+ Out << "(";
+ printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+ gep_type_end(CPV), Static);
+ Out << ")";
+ return;
+ case Instruction::Select:
+ Out << '(';
+ printConstant(CE->getOperand(0), Static);
+ Out << '?';
+ printConstant(CE->getOperand(1), Static);
+ Out << ':';
+ printConstant(CE->getOperand(2), Static);
+ Out << ')';
+ return;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd: Out << " + "; break;
+ case Instruction::Sub:
+ case Instruction::FSub: Out << " - "; break;
+ case Instruction::Mul:
+ case Instruction::FMul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl: Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ case Instruction::ICmp:
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: Out << " < "; break;
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE: Out << " <= "; break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: Out << " > "; break;
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: Out << " >= "; break;
+ default: llvm_unreachable("Illegal ICmp predicate");
+ }
+ break;
+ default: llvm_unreachable("Illegal opcode here!");
+ }
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ case Instruction::FCmp: {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+ Out << "0";
+ else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+ Out << "1";
+ else {
+ const char* op = 0;
+ switch (CE->getPredicate()) {
+ default: llvm_unreachable("Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+ Out << "llvm_fcmp_" << op << "(";
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ Out << ", ";
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ Out << ")";
+ }
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ default:
+#ifndef NDEBUG
+ errs() << "CWriter Error: Unhandled constant expression: "
+ << *CE << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*UNDEF*/";
+ if (!CPV->getType()->isVectorTy()) {
+ Out << "0)";
+ } else {
+ Out << "{})";
+ }
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ Type* Ty = CI->getType();
+ if (Ty == Type::getInt1Ty(CPV->getContext()))
+ Out << (CI->getZExtValue() ? '1' : '0');
+ else if (Ty == Type::getInt32Ty(CPV->getContext()))
+ Out << CI->getZExtValue() << 'u';
+ else if (Ty->getPrimitiveSizeInBits() > 32)
+ Out << CI->getZExtValue() << "ull";
+ else {
+ Out << "((";
+ printSimpleType(Out, Ty, false) << ')';
+ if (CI->isMinValue(true))
+ Out << CI->getZExtValue() << 'u';
+ else
+ Out << CI->getSExtValue();
+ Out << ')';
+ }
+ return;
+ }
+
+ switch (CPV->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ ConstantFP *FPC = cast<ConstantFP>(CPV);
+ std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+ if (I != FPConstantMap.end()) {
+ // Because of FP precision problems we must load from a stack allocated
+ // value that holds the value in hex.
+ Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
+ "float" :
+ FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
+ "double" :
+ "long double")
+ << "*)&FPConstant" << I->second << ')';
+ } else {
+ double V;
+ if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
+ V = FPC->getValueAPF().convertToFloat();
+ else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
+ V = FPC->getValueAPF().convertToDouble();
+ else {
+ // Long double. Convert the number to double, discarding precision.
+ // This is not awesome, but it at least makes the CBE output somewhat
+ // useful.
+ APFloat Tmp = FPC->getValueAPF();
+ bool LosesInfo;
+ Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
+ V = Tmp.convertToDouble();
+ }
+
+ if (IsNAN(V)) {
+ // The value is NaN
+
+ // FIXME the actual NaN bits should be emitted.
+ // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+ // it's 0x7ff4.
+ const unsigned long QuietNaN = 0x7ff8UL;
+ //const unsigned long SignalNaN = 0x7ff4UL;
+
+ // We need to grab the first part of the FP #
+ char Buffer[100];
+
+ uint64_t ll = DoubleToBits(V);
+ sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+ std::string Num(&Buffer[0], &Buffer[6]);
+ unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+ if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+ << Buffer << "\") /*nan*/ ";
+ else
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+ << Buffer << "\") /*nan*/ ";
+ } else if (IsInf(V)) {
+ // The value is Inf
+ if (V < 0) Out << '-';
+ Out << "LLVM_INF" <<
+ (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
+ << " /*inf*/ ";
+ } else {
+ std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ // Print out the constant as a floating point number.
+ char Buffer[100];
+ sprintf(Buffer, "%a", V);
+ Num = Buffer;
+#else
+ Num = ftostr(FPC->getValueAPF());
+#endif
+ Out << Num;
+ }
+ }
+ break;
+ }
+
+ case Type::ArrayTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ Out << "{ "; // Arrays are wrapped in struct types.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+ printConstantArray(CA, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ ArrayType *AT = cast<ArrayType>(CPV->getType());
+ Out << '{';
+ if (AT->getNumElements()) {
+ Out << ' ';
+ Constant *CZ = Constant::getNullValue(AT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ }
+ Out << " }";
+ }
+ Out << " }"; // Arrays are wrapped in struct types.
+ break;
+
+ case Type::VectorTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+ printConstantVector(CV, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ VectorType *VT = cast<VectorType>(CPV->getType());
+ Out << "{ ";
+ Constant *CZ = Constant::getNullValue(VT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::StructTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ StructType *ST = cast<StructType>(CPV->getType());
+ Out << '{';
+ if (ST->getNumElements()) {
+ Out << ' ';
+ printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
+ for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
+ }
+ }
+ Out << " }";
+ } else {
+ Out << '{';
+ if (CPV->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPV->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPV->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(CPV)) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*NULL*/0)";
+ break;
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+ writeOperand(GV, Static);
+ break;
+ }
+ // FALL THROUGH
+ default:
+#ifndef NDEBUG
+ errs() << "Unknown constant type: " << *CPV << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
+ bool NeedsExplicitCast = false;
+ Type *Ty = CE->getOperand(0)->getType();
+ bool TypeIsSigned = false;
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv: NeedsExplicitCast = true; break;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+ case Instruction::SExt:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ TypeIsSigned = true;
+ break;
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ break;
+ default: break;
+ }
+ if (NeedsExplicitCast) {
+ Out << "((";
+ if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
+ printSimpleType(Out, Ty, TypeIsSigned);
+ else
+ printType(Out, Ty); // not integer, sign doesn't matter
+ Out << ")(";
+ }
+ return NeedsExplicitCast;
+}
+
+// Print a constant assuming that it is the operand for a given Opcode. The
+// opcodes that care about sign need to cast their operands to the expected
+// type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ Type* OpTy = CPV->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+ bool typeIsSigned = false;
+
+ // Based on the Opcode for which this Constant is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+ // casted below.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ shouldCast = true;
+ break;
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ shouldCast = true;
+ typeIsSigned = true;
+ break;
+ }
+
+ // Write out the casted constant if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, typeIsSigned);
+ Out << ")";
+ printConstant(CPV, false);
+ Out << ")";
+ } else
+ printConstant(CPV, false);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+
+ // Resolve potential alias.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+ if (const Value *V = GA->resolveAliasedGlobal(false))
+ Operand = V;
+ }
+
+ // Mangle globals with the standard mangler interface for LLC compatibility.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
+ SmallString<128> Str;
+ Mang->getNameWithPrefix(Str, GV, false);
+ return CBEMangle(Str.str().str());
+ }
+
+ std::string Name = Operand->getName();
+
+ if (Name.empty()) { // Assign unique names to local temporaries.
+ unsigned &No = AnonValueNumbers[Operand];
+ if (No == 0)
+ No = ++NextAnonValueNumber;
+ Name = "tmp__" + utostr(No);
+ }
+
+ std::string VarName;
+ VarName.reserve(Name.capacity());
+
+ for (std::string::iterator I = Name.begin(), E = Name.end();
+ I != E; ++I) {
+ char ch = *I;
+
+ if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') || ch == '_')) {
+ char buffer[5];
+ sprintf(buffer, "_%x_", ch);
+ VarName += buffer;
+ } else
+ VarName += ch;
+ }
+
+ return "llvm_cbe_" + VarName;
+}
+
+/// writeInstComputationInline - Emit the computation for the specified
+/// instruction inline, with no destination provided.
+void CWriter::writeInstComputationInline(Instruction &I) {
+ // We can't currently support integer types other than 1, 8, 16, 32, 64.
+ // Validate this.
+ Type *Ty = I.getType();
+ if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+ Ty!=Type::getInt8Ty(I.getContext()) &&
+ Ty!=Type::getInt16Ty(I.getContext()) &&
+ Ty!=Type::getInt32Ty(I.getContext()) &&
+ Ty!=Type::getInt64Ty(I.getContext()))) {
+ report_fatal_error("The C backend does not currently support integer "
+ "types of widths other than 1, 8, 16, 32, 64.\n"
+ "This is being tracked as PR 4158.");
+ }
+
+ // If this is a non-trivial bool computation, make sure to truncate down to
+ // a 1 bit value. This is important because we want "add i1 x, y" to return
+ // "0" when x and y are true, not "2" for example.
+ bool NeedBoolTrunc = false;
+ if (I.getType() == Type::getInt1Ty(I.getContext()) &&
+ !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+ NeedBoolTrunc = true;
+
+ if (NeedBoolTrunc)
+ Out << "((";
+
+ visit(I);
+
+ if (NeedBoolTrunc)
+ Out << ")&1)";
+}
+
+
+void CWriter::writeOperandInternal(Value *Operand, bool Static) {
+ if (Instruction *I = dyn_cast<Instruction>(Operand))
+ // Should we inline this instruction to build a tree?
+ if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+ Out << '(';
+ writeInstComputationInline(*I);
+ Out << ')';
+ return;
+ }
+
+ Constant* CPV = dyn_cast<Constant>(Operand);
+
+ if (CPV && !isa<GlobalValue>(CPV))
+ printConstant(CPV, Static);
+ else
+ Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperand(Value *Operand, bool Static) {
+ bool isAddressImplicit = isAddressExposed(Operand);
+ if (isAddressImplicit)
+ Out << "(&"; // Global variables are referenced as their addresses by llvm
+
+ writeOperandInternal(Operand, Static);
+
+ if (isAddressImplicit)
+ Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+ Type *Ty = I.getOperand(0)->getType();
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, false);
+ Out << ")(";
+ return true;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, true);
+ Out << ")(";
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ Type* OpTy = Operand->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+
+ // Indicate whether the cast should be to a signed type or not.
+ bool castIsSigned = false;
+
+ // Based on the Opcode for which this Operand is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem: // Cast to unsigned first
+ shouldCast = true;
+ castIsSigned = false;
+ break;
+ case Instruction::GetElementPtr:
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem: // Cast to signed first
+ shouldCast = true;
+ castIsSigned = true;
+ break;
+ }
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+ } else
+ writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
+ // This has to do a cast to ensure the operand has the right signedness.
+ // Also, if the operand is a pointer, we make sure to cast to an integer when
+ // doing the comparison both for signedness and so that the C compiler doesn't
+ // optimize things like "p < NULL" to false (p may contain an integer value
+ // f.e.).
+ bool shouldCast = Cmp.isRelational();
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (!shouldCast) {
+ writeOperand(Operand);
+ return;
+ }
+
+ // Should this be a signed comparison? If so, convert to signed.
+ bool castIsSigned = Cmp.isSigned();
+
+ // If the operand was a pointer, convert to a large integer type.
+ Type* OpTy = Operand->getType();
+ if (OpTy->isPointerTy())
+ OpTy = TD->getIntPtrType(Operand->getContext());
+
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
+ const TargetData *TD) {
+ // Alloca is hard to get, and we don't want to include stdlib.h here.
+ Out << "/* get a declaration for alloca */\n"
+ << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+ << "#define alloca(x) __builtin_alloca((x))\n"
+ << "#define _alloca(x) __builtin_alloca((x))\n"
+ << "#elif defined(__APPLE__)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#define longjmp _longjmp\n"
+ << "#define setjmp _setjmp\n"
+ << "#elif defined(__sun__)\n"
+ << "#if defined(__sparcv9)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#else\n"
+ << "extern void *__builtin_alloca(unsigned int);\n"
+ << "#endif\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(_MSC_VER)\n"
+ << "#define inline _inline\n"
+ << "#define alloca(x) _alloca(x)\n"
+ << "#else\n"
+ << "#include <alloca.h>\n"
+ << "#endif\n\n";
+
+ // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+ // If we aren't being compiled with GCC, just drop these attributes.
+ Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
+ << "#define __attribute__(X)\n"
+ << "#endif\n\n";
+
+ // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __EXTERNAL_WEAK__\n"
+ << "#endif\n\n";
+
+ // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#endif\n\n";
+
+ // Add hidden visibility support. FIXME: APPLE_CC?
+ Out << "#if defined(__GNUC__)\n"
+ << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+ << "#endif\n\n";
+
+ // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+ // From the GCC documentation:
+ //
+ // double __builtin_nan (const char *str)
+ //
+ // This is an implementation of the ISO C99 function nan.
+ //
+ // Since ISO C99 defines this function in terms of strtod, which we do
+ // not implement, a description of the parsing is in order. The string is
+ // parsed as by strtol; that is, the base is recognized by leading 0 or
+ // 0x prefixes. The number parsed is placed in the significand such that
+ // the least significant bit of the number is at the least significant
+ // bit of the significand. The number is truncated to fit the significand
+ // field provided. The significand is forced to be a quiet NaN.
+ //
+ // This function, if given a string literal, is evaluated early enough
+ // that it is considered a compile-time constant.
+ //
+ // float __builtin_nanf (const char *str)
+ //
+ // Similar to __builtin_nan, except the return type is float.
+ //
+ // double __builtin_inf (void)
+ //
+ // Similar to __builtin_huge_val, except a warning is generated if the
+ // target floating-point format does not support infinities. This
+ // function is suitable for implementing the ISO C99 macro INFINITY.
+ //
+ // float __builtin_inff (void)
+ //
+ // Similar to __builtin_inf, except the return type is float.
+ Out << "#ifdef __GNUC__\n"
+ << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
+ << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+ << "#define LLVM_INF __builtin_inf() /* Double */\n"
+ << "#define LLVM_INFF __builtin_inff() /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) "
+ "__builtin_prefetch(addr,rw,locality)\n"
+ << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+ << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+ << "#define LLVM_ASM __asm__\n"
+ << "#else\n"
+ << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_INF ((double)0.0) /* Double */\n"
+ << "#define LLVM_INFF 0.0F /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
+ << "#define __ATTRIBUTE_CTOR__\n"
+ << "#define __ATTRIBUTE_DTOR__\n"
+ << "#define LLVM_ASM(X)\n"
+ << "#endif\n\n";
+
+ Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+ << "#define __builtin_stack_save() 0 /* not implemented */\n"
+ << "#define __builtin_stack_restore(X) /* noop */\n"
+ << "#endif\n\n";
+
+ // Output typedefs for 128-bit integers. If these are needed with a
+ // 32-bit target or with a C compiler that doesn't support mode(TI),
+ // more drastic measures will be needed.
+ Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
+ << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
+ << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
+ << "#endif\n\n";
+
+ // Output target-specific code that should be inserted into main.
+ Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ Constant *FP = CS->getOperand(1);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP))
+ StaticTors.insert(F);
+ }
+}
+
+enum SpecialGlobalClass {
+ NotSpecial = 0,
+ GlobalCtors, GlobalDtors,
+ NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+ // If this is a global ctors/dtors list, handle it now.
+ if (GV->hasAppendingLinkage() && GV->use_empty()) {
+ if (GV->getName() == "llvm.global_ctors")
+ return GlobalCtors;
+ else if (GV->getName() == "llvm.global_dtors")
+ return GlobalDtors;
+ }
+
+ // Otherwise, if it is other metadata, don't print it. This catches things
+ // like debug information.
+ if (GV->getSection() == "llvm.metadata")
+ return NotPrinted;
+
+ return NotSpecial;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const char *Str, unsigned Length,
+ raw_ostream &Out) {
+ for (unsigned i = 0; i != Length; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '\\' && C != '"')
+ Out << C;
+ else if (C == '\\')
+ Out << "\\\\";
+ else if (C == '\"')
+ Out << "\\\"";
+ else if (C == '\t')
+ Out << "\\t";
+ else
+ Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+ PrintEscapedString(Str.c_str(), Str.size(), Out);
+}
+
+bool CWriter::doInitialization(Module &M) {
+ FunctionPass::doInitialization(M);
+
+ // Initialize
+ TheModule = &M;
+
+ TD = new TargetData(&M);
+ IL = new IntrinsicLowering(*TD);
+ IL->AddPrototypes(M);
+
+#if 0
+ std::string Triple = TheModule->getTargetTriple();
+ if (Triple.empty())
+ Triple = llvm::sys::getHostTriple();
+
+ std::string E;
+ if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+ TAsm = Match->createMCAsmInfo(Triple);
+#endif
+ TAsm = new CBEMCAsmInfo();
+ MRI = new MCRegisterInfo();
+ TCtx = new MCContext(*TAsm, *MRI, NULL);
+ Mang = new Mangler(*TCtx, *TD);
+
+ // Keep track of which functions are static ctors/dtors so they can have
+ // an attribute added to their prototypes.
+ std::set<Function*> StaticCtors, StaticDtors;
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ switch (getGlobalVariableClass(I)) {
+ default: break;
+ case GlobalCtors:
+ FindStaticTors(I, StaticCtors);
+ break;
+ case GlobalDtors:
+ FindStaticTors(I, StaticDtors);
+ break;
+ }
+ }
+
+ // get declaration for alloca
+ Out << "/* Provide Declarations */\n";
+ Out << "#include <stdarg.h>\n"; // Varargs support
+ Out << "#include <setjmp.h>\n"; // Unwind support
+ Out << "#include <limits.h>\n"; // With overflow intrinsics support.
+ generateCompilerSpecificCode(Out, TD);
+
+ // Provide a definition for `bool' if not compiling with a C++ compiler.
+ Out << "\n"
+ << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+ << "\n\n/* Support for floating point constants */\n"
+ << "typedef unsigned long long ConstantDoubleTy;\n"
+ << "typedef unsigned int ConstantFloatTy;\n"
+ << "typedef struct { unsigned long long f1; unsigned short f2; "
+ "unsigned short pad[3]; } ConstantFP80Ty;\n"
+ // This is used for both kinds of 128-bit long double; meaning differs.
+ << "typedef struct { unsigned long long f1; unsigned long long f2; }"
+ " ConstantFP128Ty;\n"
+ << "\n\n/* Global Declarations */\n";
+
+ // First output all the declarations for the program, because C requires
+ // Functions & globals to be declared before they are used.
+ //
+ if (!M.getModuleInlineAsm().empty()) {
+ Out << "/* Module asm statements */\n"
+ << "asm(";
+
+ // Split the string into lines, to make it easier to read the .ll file.
+ std::string Asm = M.getModuleInlineAsm();
+ size_t CurPos = 0;
+ size_t NewLine = Asm.find_first_of('\n', CurPos);
+ while (NewLine != std::string::npos) {
+ // We found a newline, print the portion of the asm string from the
+ // last newline up to this newline.
+ Out << "\"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+ Out);
+ Out << "\\n\"\n";
+ CurPos = NewLine+1;
+ NewLine = Asm.find_first_of('\n', CurPos);
+ }
+ Out << "\"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
+ Out << "\");\n"
+ << "/* End Module asm statements */\n";
+ }
+
+ // Loop over the symbol table, emitting all named constants.
+ printModuleTypes();
+
+ // Global variable declarations...
+ if (!M.global_empty()) {
+ Out << "\n/* External Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+
+ if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+ I->hasCommonLinkage())
+ Out << "extern ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else
+ continue; // Internal Global
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ Out << ";\n";
+ }
+ }
+
+ // Function declarations
+ Out << "\n/* Function Declarations */\n";
+ Out << "double fmod(double, double);\n"; // Support for FP rem
+ Out << "float fmodf(float, float);\n";
+ Out << "long double fmodl(long double, long double);\n";
+
+ // Store the intrinsics which will be declared/defined below.
+ SmallVector<const Function*, 8> intrinsicsToDefine;
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ // Don't print declarations for intrinsic functions.
+ // Store the used intrinsics, which need to be explicitly defined.
+ if (I->isIntrinsic()) {
+ switch (I->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ intrinsicsToDefine.push_back(I);
+ break;
+ }
+ continue;
+ }
+
+ if (I->getName() == "setjmp" ||
+ I->getName() == "longjmp" || I->getName() == "_setjmp")
+ continue;
+
+ if (I->hasExternalWeakLinkage())
+ Out << "extern ";
+ printFunctionSignature(I, true);
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (StaticCtors.count(I))
+ Out << " __ATTRIBUTE_CTOR__";
+ if (StaticDtors.count(I))
+ Out << " __ATTRIBUTE_DTOR__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ if (I->hasName() && I->getName()[0] == 1)
+ Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+
+ Out << ";\n";
+ }
+
+ // Output the global variable declarations
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else
+ Out << "extern ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasCommonLinkage()) // FIXME is this right?
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable definitions and contents...
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else if (I->hasDLLExportLinkage())
+ Out << "__declspec(dllexport) ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasCommonLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ // If the initializer is not null, emit the initializer. If it is null,
+ // we try to avoid emitting large amounts of zeros. The problem with
+ // this, however, occurs when the variable has weak linkage. In this
+ // case, the assembler will complain about the variable being both weak
+ // and common, so we disable this optimization.
+ // FIXME common linkage should avoid this problem.
+ if (!I->getInitializer()->isNullValue()) {
+ Out << " = " ;
+ writeOperand(I->getInitializer(), true);
+ } else if (I->hasWeakLinkage()) {
+ // We have to specify an initializer, but it doesn't have to be
+ // complete. If the value is an aggregate, print out { 0 }, and let
+ // the compiler figure out the rest of the zeros.
+ Out << " = " ;
+ if (I->getInitializer()->getType()->isStructTy() ||
+ I->getInitializer()->getType()->isVectorTy()) {
+ Out << "{ 0 }";
+ } else if (I->getInitializer()->getType()->isArrayTy()) {
+ // As with structs and vectors, but with an extra set of braces
+ // because arrays are wrapped in structs.
+ Out << "{ { 0 } }";
+ } else {
+ // Just print it out normally.
+ writeOperand(I->getInitializer(), true);
+ }
+ }
+ Out << ";\n";
+ }
+ }
+
+ if (!M.empty())
+ Out << "\n\n/* Function Bodies */\n";
+
+ // Emit some helper functions for dealing with FCMP instruction's
+ // predicates
+ Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+ Out << "return X == X && Y == Y; }\n";
+ Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+ Out << "return X != X || Y != Y; }\n";
+ Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+ Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+ Out << "return X != Y; }\n";
+ Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+ Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+ Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+ Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+ Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+ Out << "return X == Y ; }\n";
+ Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+ Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+ Out << "return X < Y ; }\n";
+ Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+ Out << "return X > Y ; }\n";
+ Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+ Out << "return X <= Y ; }\n";
+ Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+ Out << "return X >= Y ; }\n";
+
+ // Emit definitions of the intrinsics.
+ for (SmallVector<const Function*, 8>::const_iterator
+ I = intrinsicsToDefine.begin(),
+ E = intrinsicsToDefine.end(); I != E; ++I) {
+ printIntrinsicDefinition(**I, Out);
+ }
+
+ return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+ // Scan the module for floating point constants. If any FP constant is used
+ // in the function, we want to redirect it here so that we do not depend on
+ // the precision of the printed form, unless the printed form preserves
+ // precision.
+ //
+ for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+ I != E; ++I)
+ printFloatingPointConstants(*I);
+
+ Out << '\n';
+}
+
+void CWriter::printFloatingPointConstants(const Constant *C) {
+ // If this is a constant expression, recursively check for constant fp values.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ printFloatingPointConstants(CE->getOperand(i));
+ return;
+ }
+
+ // Otherwise, check for a FP constant that we need to print.
+ const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
+ if (FPC == 0 ||
+ // Do not put in FPConstantMap if safe.
+ isFPCSafeToPrint(FPC) ||
+ // Already printed this constant?
+ FPConstantMap.count(FPC))
+ return;
+
+ FPConstantMap[FPC] = FPCounter; // Number the FP constants
+
+ if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
+ double Val = FPC->getValueAPF().convertToDouble();
+ uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "ULL; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
+ float Val = FPC->getValueAPF().convertToFloat();
+ uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "U; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
+ // api needed to prevent premature destruction
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
+ << " = { 0x" << utohexstr(p[0])
+ << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+ << "}; /* Long double constant */\n";
+ } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
+ FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
+ << " = { 0x"
+ << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+ << "}; /* Long double constant */\n";
+
+ } else {
+ llvm_unreachable("Unknown float type!");
+ }
+}
+
+
+/// printSymbolTable - Run through symbol table looking for type names. If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes() {
+ Out << "/* Helper union for bitcasts */\n";
+ Out << "typedef union {\n";
+ Out << " unsigned int Int32;\n";
+ Out << " unsigned long long Int64;\n";
+ Out << " float Float;\n";
+ Out << " double Double;\n";
+ Out << "} llvmBitCastUnion;\n";
+
+ // Get all of the struct types used in the module.
+ std::vector<StructType*> StructTypes;
+ TheModule->findUsedStructTypes(StructTypes);
+
+ if (StructTypes.empty()) return;
+
+ Out << "/* Structure forward decls */\n";
+
+ unsigned NextTypeID = 0;
+
+ // If any of them are missing names, add a unique ID to UnnamedStructIDs.
+ // Print out forward declarations for structure types.
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *ST = StructTypes[i];
+
+ if (ST->isLiteral() || ST->getName().empty())
+ UnnamedStructIDs[ST] = NextTypeID++;
+
+ std::string Name = getStructName(ST);
+
+ Out << "typedef struct " << Name << ' ' << Name << ";\n";
+ }
+
+ Out << '\n';
+
+ // Keep track of which structures have been printed so far.
+ SmallPtrSet<Type *, 16> StructPrinted;
+
+ // Loop over all structures then push them into the stack so they are
+ // printed in the correct order.
+ //
+ Out << "/* Structure contents */\n";
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
+ if (StructTypes[i]->isStructTy())
+ // Only print out used types!
+ printContainedStructs(StructTypes[i], StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO: Make this work properly with vector types
+//
+void CWriter::printContainedStructs(Type *Ty,
+ SmallPtrSet<Type *, 16> &StructPrinted) {
+ // Don't walk through pointers.
+ if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return;
+
+ // Print all contained types first.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ printContainedStructs(*I, StructPrinted);
+
+ if (StructType *ST = dyn_cast<StructType>(Ty)) {
+ // Check to see if we have already printed this struct.
+ if (!StructPrinted.insert(Ty)) return;
+
+ // Print structure type out.
+ printType(Out, ST, false, getStructName(ST), true);
+ Out << ";\n\n";
+ }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F->hasStructRetAttr();
+
+ if (F->hasLocalLinkage()) Out << "static ";
+ if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+ if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Out << "__attribute__((stdcall)) ";
+ break;
+ case CallingConv::X86_FastCall:
+ Out << "__attribute__((fastcall)) ";
+ break;
+ case CallingConv::X86_ThisCall:
+ Out << "__attribute__((thiscall)) ";
+ break;
+ default:
+ break;
+ }
+
+ // Loop over the arguments, printing them...
+ FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ const AttrListPtr &PAL = F->getAttributes();
+
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
+
+ // Print out the name...
+ FunctionInnards << GetValueName(F) << '(';
+
+ bool PrintedArg = false;
+ if (!F->isDeclaration()) {
+ if (!F->arg_empty()) {
+ Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ std::string ArgName;
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ if (I->hasName() || !Prototype)
+ ArgName = GetValueName(I);
+ else
+ ArgName = "";
+ Type *ArgTy = I->getType();
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ ByValParams.insert(I);
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
+ ArgName);
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+ } else {
+ // Loop over the arguments, printing them.
+ FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+
+ if (!PrintedArg && FT->isVarArg()) {
+ FunctionInnards << "int vararg_dummy_arg";
+ PrintedArg = true;
+ }
+
+ // Finish printing arguments... if this is a vararg function, print the ...,
+ // unless there are no known types, in which case, we just emit ().
+ //
+ if (FT->isVarArg() && PrintedArg) {
+ FunctionInnards << ",..."; // Output varargs portion of signature!
+ } else if (!FT->isVarArg() && !PrintedArg) {
+ FunctionInnards << "void"; // ret() -> ret(void) in C.
+ }
+ FunctionInnards << ')';
+
+ // Get the return tpe for the function.
+ Type *RetTy;
+ if (!isStructReturn)
+ RetTy = F->getReturnType();
+ else {
+ // If this is a struct-return function, print the struct-return type.
+ RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+ }
+
+ // Print out the return type and the signature built above.
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
+ FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+ if (!isa<BitCastInst>(I))
+ return false;
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DstTy = I.getType();
+ return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+ (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
+}
+
+void CWriter::printFunction(Function &F) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F.hasStructRetAttr();
+
+ printFunctionSignature(&F, false);
+ Out << " {\n";
+
+ // If this is a struct return function, handle the result with magic.
+ if (isStructReturn) {
+ Type *StructTy =
+ cast<PointerType>(F.arg_begin()->getType())->getElementType();
+ Out << " ";
+ printType(Out, StructTy, false, "StructReturn");
+ Out << "; /* Struct return temporary */\n";
+
+ Out << " ";
+ printType(Out, F.arg_begin()->getType(), false,
+ GetValueName(F.arg_begin()));
+ Out << " = &StructReturn;\n";
+ }
+
+ bool PrintedVar = false;
+
+ // print local variable information for the function
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+ Out << " ";
+ printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+ Out << "; /* Address-exposed local */\n";
+ PrintedVar = true;
+ } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
+ !isInlinableInst(*I)) {
+ Out << " ";
+ printType(Out, I->getType(), false, GetValueName(&*I));
+ Out << ";\n";
+
+ if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well...
+ Out << " ";
+ printType(Out, I->getType(), false,
+ GetValueName(&*I)+"__PHI_TEMPORARY");
+ Out << ";\n";
+ }
+ PrintedVar = true;
+ }
+ // We need a temporary for the BitCast to use so it can pluck a value out
+ // of a union to do the BitCast. This is separate from the need for a
+ // variable to hold the result of the BitCast.
+ if (isFPIntBitCast(*I)) {
+ Out << " llvmBitCastUnion " << GetValueName(&*I)
+ << "__BITCAST_TEMPORARY;\n";
+ PrintedVar = true;
+ }
+ }
+
+ if (PrintedVar)
+ Out << '\n';
+
+ if (F.hasExternalLinkage() && F.getName() == "main")
+ Out << " CODE_FOR_MAIN();\n";
+
+ // print the basic blocks
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (L->getHeader() == BB && L->getParentLoop() == 0)
+ printLoop(L);
+ } else {
+ printBasicBlock(BB);
+ }
+ }
+
+ Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+ Out << " do { /* Syntactic loop '" << L->getHeader()->getName()
+ << "' to make GCC happy */\n";
+ for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ Loop *BBLoop = LI->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+ printLoop(BBLoop);
+ }
+ Out << " } while (1); /* end of syntactic loop '"
+ << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+ // Don't print the label for the basic block if there are no uses, or if
+ // the only terminator use is the predecessor basic block's terminator.
+ // We have to scan the use list because PHI nodes use basic blocks too but
+ // do not require a label to be generated.
+ //
+ bool NeedsLabel = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (isGotoCodeNecessary(*PI, BB)) {
+ NeedsLabel = true;
+ break;
+ }
+
+ if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+ ++II) {
+ if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+ if (II->getType() != Type::getVoidTy(BB->getContext()) &&
+ !isInlineAsm(*II))
+ outputLValue(II);
+ else
+ Out << " ";
+ writeInstComputationInline(*II);
+ Out << ";\n";
+ }
+ }
+
+ // Don't emit prefix or suffix for the terminator.
+ visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+ // If this is a struct return function, return the temporary struct.
+ bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+
+ if (isStructReturn) {
+ Out << " return StructReturn;\n";
+ return;
+ }
+
+ // Don't output a void return if this is the last basic block in the function
+ if (I.getNumOperands() == 0 &&
+ &*--I.getParent()->getParent()->end() == I.getParent() &&
+ !I.getParent()->size() == 1) {
+ return;
+ }
+
+ Out << " return";
+ if (I.getNumOperands()) {
+ Out << ' ';
+ writeOperand(I.getOperand(0));
+ }
+ Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+ Value* Cond = SI.getCondition();
+
+ Out << " switch (";
+ writeOperand(Cond);
+ Out << ") {\n default:\n";
+ printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+ printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+ Out << ";\n";
+
+ unsigned NumCases = SI.getNumCases();
+ // Skip the first item since that's the default case.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ ConstantInt* CaseVal = SI.getCaseValue(i);
+ BasicBlock* Succ = SI.getSuccessor(i);
+ Out << " case ";
+ writeOperand(CaseVal);
+ Out << ":\n";
+ printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+ printBranchToBlock(SI.getParent(), Succ, 2);
+ if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+ Out << " break;\n";
+ }
+
+ Out << " }\n";
+}
+
+void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
+ Out << " goto *(void*)(";
+ writeOperand(IBI.getOperand(0));
+ Out << ");\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+ Out << " /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+ /// FIXME: This should be reenabled, but loop reordering safe!!
+ return true;
+
+ if (llvm::next(Function::iterator(From)) != Function::iterator(To))
+ return true; // Not the direct successor, we need a goto.
+
+ //isa<SwitchInst>(From->getTerminator())
+
+ if (LI->getLoopFor(From) != LI->getLoopFor(To))
+ return true;
+ return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+ BasicBlock *Successor,
+ unsigned Indent) {
+ for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Now we have to do the printing.
+ Value *IV = PN->getIncomingValueForBlock(CurBlock);
+ if (!isa<UndefValue>(IV)) {
+ Out << std::string(Indent, ' ');
+ Out << " " << GetValueName(I) << "__PHI_TEMPORARY = ";
+ writeOperand(IV);
+ Out << "; /* for PHI node */\n";
+ }
+ }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+ unsigned Indent) {
+ if (isGotoCodeNecessary(CurBB, Succ)) {
+ Out << std::string(Indent, ' ') << " goto ";
+ writeOperand(Succ);
+ Out << ";\n";
+ }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+ if (I.isConditional()) {
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+ Out << " if (";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+ Out << " } else {\n";
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+ } else {
+ // First goto not necessary, assume second one is...
+ Out << " if (!";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+
+ Out << " }\n";
+ } else {
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+ }
+ Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks. We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+ writeOperand(&I);
+ Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+ // binary instructions, shift instructions, setCond instructions.
+ assert(!I.getType()->isPointerTy());
+
+ // We must cast the results of binary operations which might be promoted.
+ bool needsCast = false;
+ if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
+ (I.getType() == Type::getInt16Ty(I.getContext()))
+ || (I.getType() == Type::getFloatTy(I.getContext()))) {
+ needsCast = true;
+ Out << "((";
+ printType(Out, I.getType(), false);
+ Out << ")(";
+ }
+
+ // If this is a negation operation, print it out as such. For FP, we don't
+ // want to print "-0.0 - X".
+ if (BinaryOperator::isNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (BinaryOperator::isFNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (I.getOpcode() == Instruction::FRem) {
+ // Output a call to fmod/fmodf instead of emitting a%b
+ if (I.getType() == Type::getFloatTy(I.getContext()))
+ Out << "fmodf(";
+ else if (I.getType() == Type::getDoubleTy(I.getContext()))
+ Out << "fmod(";
+ else // all 3 flavors of long double
+ Out << "fmodl(";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ } else {
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain instructions require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd: Out << " + "; break;
+ case Instruction::Sub:
+ case Instruction::FSub: Out << " - "; break;
+ case Instruction::Mul:
+ case Instruction::FMul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl : Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ default:
+#ifndef NDEBUG
+ errs() << "Invalid operator type!" << I;
+#endif
+ llvm_unreachable(0);
+ }
+
+ writeOperandWithCast(I.getOperand(1), I.getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ }
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+ // We must cast the results of icmp which might be promoted.
+ bool needsCast = false;
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain icmp predicate require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE: Out << " <= "; break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE: Out << " >= "; break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: Out << " < "; break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: Out << " > "; break;
+ default:
+#ifndef NDEBUG
+ errs() << "Invalid icmp predicate!" << I;
+#endif
+ llvm_unreachable(0);
+ }
+
+ writeOperandWithCast(I.getOperand(1), I);
+ if (NeedsClosingParens)
+ Out << "))";
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+ if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+ Out << "0";
+ return;
+ }
+ if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+ Out << "1";
+ return;
+ }
+
+ const char* op = 0;
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+
+ Out << "llvm_fcmp_" << op << "(";
+ // Write the first operand
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ // Write the second operand
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+static const char * getFloatBitCastField(Type *Ty) {
+ switch (Ty->getTypeID()) {
+ default: llvm_unreachable("Invalid Type");
+ case Type::FloatTyID: return "Float";
+ case Type::DoubleTyID: return "Double";
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits <= 32)
+ return "Int32";
+ else
+ return "Int64";
+ }
+ }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+ Type *DstTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ if (isFPIntBitCast(I)) {
+ Out << '(';
+ // These int<->float and long<->double casts need to be handled specially
+ Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+ writeOperand(I.getOperand(0));
+ Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getType());
+ Out << ')';
+ return;
+ }
+
+ Out << '(';
+ printCast(I.getOpcode(), SrcTy, DstTy);
+
+ // Make a sext from i1 work by subtracting the i1 from 0 (an int).
+ if (SrcTy == Type::getInt1Ty(I.getContext()) &&
+ I.getOpcode() == Instruction::SExt)
+ Out << "0-";
+
+ writeOperand(I.getOperand(0));
+
+ if (DstTy == Type::getInt1Ty(I.getContext()) &&
+ (I.getOpcode() == Instruction::Trunc ||
+ I.getOpcode() == Instruction::FPToUI ||
+ I.getOpcode() == Instruction::FPToSI ||
+ I.getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really get a trunc to bool by anding the operand with 1
+ Out << "&1u";
+ }
+ Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+ Out << "((";
+ writeOperand(I.getCondition());
+ Out << ") ? (";
+ writeOperand(I.getTrueValue());
+ Out << ") : (";
+ writeOperand(I.getFalseValue());
+ Out << "))";
+}
+
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax,
+ raw_ostream &Out) {
+ const char* type;
+ const char* sprefix = "";
+
+ unsigned NumBits = Ty.getBitWidth();
+ if (NumBits <= 8) {
+ type = "CHAR";
+ sprefix = "S";
+ } else if (NumBits <= 16) {
+ type = "SHRT";
+ } else if (NumBits <= 32) {
+ type = "INT";
+ } else if (NumBits <= 64) {
+ type = "LLONG";
+ } else {
+ llvm_unreachable("Bit widths > 64 not implemented yet");
+ }
+
+ if (isSigned)
+ Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+ else
+ Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+#ifndef NDEBUG
+static bool isSupportedIntegerSize(IntegerType &T) {
+ return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+ T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+#endif
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+ FunctionType *funT = F.getFunctionType();
+ Type *retT = F.getReturnType();
+ IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+ assert(isSupportedIntegerSize(*elemT) &&
+ "CBackend does not support arbitrary size integers.");
+ assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+ elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+ switch (F.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported Intrinsic.");
+ case Intrinsic::uadd_with_overflow:
+ // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+ // Rty r;
+ // r.field0 = a + b;
+ // r.field1 = (r.field0 < a);
+ // return r;
+ // }
+ Out << "static inline ";
+ printType(Out, retT);
+ Out << GetValueName(&F);
+ Out << "(";
+ printSimpleType(Out, elemT, false);
+ Out << "a,";
+ printSimpleType(Out, elemT, false);
+ Out << "b) {\n ";
+ printType(Out, retT);
+ Out << "r;\n";
+ Out << " r.field0 = a + b;\n";
+ Out << " r.field1 = (r.field0 < a);\n";
+ Out << " return r;\n}\n";
+ break;
+
+ case Intrinsic::sadd_with_overflow:
+ // static inline Rty sadd_ixx(ixx a, ixx b) {
+ // Rty r;
+ // r.field1 = (b > 0 && a > XX_MAX - b) ||
+ // (b < 0 && a < XX_MIN - b);
+ // r.field0 = r.field1 ? 0 : a + b;
+ // return r;
+ // }
+ Out << "static ";
+ printType(Out, retT);
+ Out << GetValueName(&F);
+ Out << "(";
+ printSimpleType(Out, elemT, true);
+ Out << "a,";
+ printSimpleType(Out, elemT, true);
+ Out << "b) {\n ";
+ printType(Out, retT);
+ Out << "r;\n";
+ Out << " r.field1 = (b > 0 && a > ";
+ printLimitValue(*elemT, true, true, Out);
+ Out << " - b) || (b < 0 && a < ";
+ printLimitValue(*elemT, true, false, Out);
+ Out << " - b);\n";
+ Out << " r.field0 = r.field1 ? 0 : a + b;\n";
+ Out << " return r;\n}\n";
+ break;
+ }
+}
+
+void CWriter::lowerIntrinsics(Function &F) {
+ // This is used to keep track of intrinsics that get generated to a lowered
+ // function. We must generate the prototypes before the function body which
+ // will only be expanded on first use (by the loop below).
+ std::vector<Function*> prototypesToGen;
+
+ // Examine all the instructions in this function to find the intrinsics that
+ // need to be lowered.
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::setjmp:
+ case Intrinsic::longjmp:
+ case Intrinsic::prefetch:
+ case Intrinsic::powi:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ case Intrinsic::ppc_altivec_lvsl:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ // We directly implement these intrinsics
+ break;
+ default:
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we handle it.
+ const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ // If we handle it, don't lower it.
+ if (BuiltinName[0]) break;
+
+ // All other intrinsic calls we must lower.
+ Instruction *Before = 0;
+ if (CI != &BB->front())
+ Before = prior(BasicBlock::iterator(CI));
+
+ IL->LowerIntrinsicCall(CI);
+ if (Before) { // Move iterator to instruction after call
+ I = Before; ++I;
+ } else {
+ I = BB->begin();
+ }
+ // If the intrinsic got lowered to another call, and that call has
+ // a definition then we need to make sure its prototype is emitted
+ // before any calls to it.
+ if (CallInst *Call = dyn_cast<CallInst>(I))
+ if (Function *NewF = Call->getCalledFunction())
+ if (!NewF->isDeclaration())
+ prototypesToGen.push_back(NewF);
+
+ break;
+ }
+
+ // We may have collected some prototypes to emit in the loop above.
+ // Emit them now, before the function that uses them is emitted. But,
+ // be careful not to emit them twice.
+ std::vector<Function*>::iterator I = prototypesToGen.begin();
+ std::vector<Function*>::iterator E = prototypesToGen.end();
+ for ( ; I != E; ++I) {
+ if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+ Out << '\n';
+ printFunctionSignature(*I, true);
+ Out << ";\n";
+ }
+ }
+}
+
+void CWriter::visitCallInst(CallInst &I) {
+ if (isa<InlineAsm>(I.getCalledValue()))
+ return visitInlineAsm(I);
+
+ bool WroteCallee = false;
+
+ // Handle intrinsic function calls first...
+ if (Function *F = I.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ if (visitBuiltinCall(I, ID, WroteCallee))
+ return;
+
+ Value *Callee = I.getCalledValue();
+
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ // If this is a call to a struct-return function, assign to the first
+ // parameter instead of passing it to the call.
+ const AttrListPtr &PAL = I.getAttributes();
+ bool hasByVal = I.hasByValArgument();
+ bool isStructRet = I.hasStructRetAttr();
+ if (isStructRet) {
+ writeOperandDeref(I.getArgOperand(0));
+ Out << " = ";
+ }
+
+ if (I.isTailCall()) Out << " /*tail*/ ";
+
+ if (!WroteCallee) {
+ // If this is an indirect call to a struct return function, we need to cast
+ // the pointer. Ditto for indirect calls with byval arguments.
+ bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
+
+ // GCC is a real PITA. It does not permit codegening casts of functions to
+ // function pointers if they are in a call (it generates a trap instruction
+ // instead!). We work around this by inserting a cast to void* in between
+ // the function and the function pointer cast. Unfortunately, we can't just
+ // form the constant expression here, because the folder will immediately
+ // nuke it.
+ //
+ // Note finally, that this is completely unsafe. ANSI C does not guarantee
+ // that void* and function pointers have the same size. :( To deal with this
+ // in the common case, we handle casts where the number of arguments passed
+ // match exactly.
+ //
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+ if (CE->isCast())
+ if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+ NeedsCast = true;
+ Callee = RF;
+ }
+
+ if (NeedsCast) {
+ // Ok, just cast the pointer type.
+ Out << "((";
+ if (isStructRet)
+ printStructReturnPointerFunctionType(Out, PAL,
+ cast<PointerType>(I.getCalledValue()->getType()));
+ else if (hasByVal)
+ printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
+ else
+ printType(Out, I.getCalledValue()->getType());
+ Out << ")(void*)";
+ }
+ writeOperand(Callee);
+ if (NeedsCast) Out << ')';
+ }
+
+ Out << '(';
+
+ bool PrintedArg = false;
+ if(FTy->isVarArg() && !FTy->getNumParams()) {
+ Out << "0 /*dummy arg*/";
+ PrintedArg = true;
+ }
+
+ unsigned NumDeclaredParams = FTy->getNumParams();
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ unsigned ArgNo = 0;
+ if (isStructRet) { // Skip struct return argument.
+ ++AI;
+ ++ArgNo;
+ }
+
+
+ for (; AI != AE; ++AI, ++ArgNo) {
+ if (PrintedArg) Out << ", ";
+ if (ArgNo < NumDeclaredParams &&
+ (*AI)->getType() != FTy->getParamType(ArgNo)) {
+ Out << '(';
+ printType(Out, FTy->getParamType(ArgNo),
+ /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
+ Out << ')';
+ }
+ // Check if the argument is expected to be passed by value.
+ if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ writeOperandDeref(*AI);
+ else
+ writeOperand(*AI);
+ PrintedArg = true;
+ }
+ Out << ')';
+}
+
+/// visitBuiltinCall - Handle the call to the specified builtin. Returns true
+/// if the entire call is handled, return false if it wasn't handled, and
+/// optionally set 'WroteCallee' if the callee has already been printed out.
+bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
+ bool &WroteCallee) {
+ switch (ID) {
+ default: {
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we emit it here.
+ const char *BuiltinName = "";
+ Function *F = I.getCalledFunction();
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+ Out << BuiltinName;
+ WroteCallee = true;
+ return false;
+ }
+ case Intrinsic::vastart:
+ Out << "0; ";
+
+ Out << "va_start(*(va_list*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ // Output the last argument to the enclosing function.
+ if (I.getParent()->getParent()->arg_empty())
+ Out << "vararg_dummy_arg";
+ else
+ writeOperand(--I.getParent()->getParent()->arg_end());
+ Out << ')';
+ return true;
+ case Intrinsic::vaend:
+ if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
+ Out << "0; va_end(*(va_list*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ } else {
+ Out << "va_end(*(va_list*)0)";
+ }
+ return true;
+ case Intrinsic::vacopy:
+ Out << "0; ";
+ Out << "va_copy(*(va_list*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", *(va_list*)";
+ writeOperand(I.getArgOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::returnaddress:
+ Out << "__builtin_return_address(";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ return true;
+ case Intrinsic::frameaddress:
+ Out << "__builtin_frame_address(";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ return true;
+ case Intrinsic::powi:
+ Out << "__builtin_powi(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::setjmp:
+ Out << "setjmp(*(jmp_buf*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ return true;
+ case Intrinsic::longjmp:
+ Out << "longjmp(*(jmp_buf*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::prefetch:
+ Out << "LLVM_PREFETCH((const void *)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ", ";
+ writeOperand(I.getArgOperand(2));
+ Out << ")";
+ return true;
+ case Intrinsic::stacksave:
+ // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
+ // to work around GCC bugs (see PR1809).
+ Out << "0; *((void**)&" << GetValueName(&I)
+ << ") = __builtin_stack_save()";
+ return true;
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ // Multiple GCC builtins multiplex onto this intrinsic.
+ switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
+ default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
+ case 0: Out << "__builtin_ia32_cmpeq"; break;
+ case 1: Out << "__builtin_ia32_cmplt"; break;
+ case 2: Out << "__builtin_ia32_cmple"; break;
+ case 3: Out << "__builtin_ia32_cmpunord"; break;
+ case 4: Out << "__builtin_ia32_cmpneq"; break;
+ case 5: Out << "__builtin_ia32_cmpnlt"; break;
+ case 6: Out << "__builtin_ia32_cmpnle"; break;
+ case 7: Out << "__builtin_ia32_cmpord"; break;
+ }
+ if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
+ Out << 'p';
+ else
+ Out << 's';
+ if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
+ Out << 's';
+ else
+ Out << 'd';
+
+ Out << "(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ")";
+ return true;
+ case Intrinsic::ppc_altivec_lvsl:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ Out << "__builtin_altivec_lvsl(0, (void*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ")";
+ return true;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ Out << GetValueName(I.getCalledFunction()) << "(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ")";
+ return true;
+ }
+}
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+// of the per target tables
+// handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+ assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+ // Grab the translation table from MCAsmInfo if it exists.
+ const MCAsmInfo *TargetAsm;
+ std::string Triple = TheModule->getTargetTriple();
+ if (Triple.empty())
+ Triple = llvm::sys::getHostTriple();
+
+ std::string E;
+ if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+ TargetAsm = Match->createMCAsmInfo(Triple);
+ else
+ return c.Codes[0];
+
+ const char *const *table = TargetAsm->getAsmCBE();
+
+ // Search the translation table if it exists.
+ for (int i = 0; table && table[i]; i += 2)
+ if (c.Codes[0] == table[i]) {
+ delete TargetAsm;
+ return table[i+1];
+ }
+
+ // Default is identity.
+ delete TargetAsm;
+ return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+ for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+ if (asmstr[i] == '\n')
+ asmstr.replace(i, 1, "\\n");
+ else if (asmstr[i] == '\t')
+ asmstr.replace(i, 1, "\\t");
+ else if (asmstr[i] == '$') {
+ if (asmstr[i + 1] == '{') {
+ std::string::size_type a = asmstr.find_first_of(':', i + 1);
+ std::string::size_type b = asmstr.find_first_of('}', i + 1);
+ std::string n = "%" +
+ asmstr.substr(a + 1, b - a - 1) +
+ asmstr.substr(i + 2, a - i - 2);
+ asmstr.replace(i, b - i + 1, n);
+ i += n.size() - 1;
+ } else
+ asmstr.replace(i, 1, "%");
+ }
+ else if (asmstr[i] == '%')//grr
+ { asmstr.replace(i, 1, "%%"); ++i;}
+
+ return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+// handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+ InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
+ InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
+
+ std::vector<std::pair<Value*, int> > ResultVals;
+ if (CI.getType() == Type::getVoidTy(CI.getContext()))
+ ;
+ else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+ ResultVals.push_back(std::make_pair(&CI, (int)i));
+ } else {
+ ResultVals.push_back(std::make_pair(&CI, -1));
+ }
+
+ // Fix up the asm string for gcc and emit it.
+ Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
+ Out << " :";
+
+ unsigned ValueCount = 0;
+ bool IsFirst = true;
+
+ // Convert over all the output constraints.
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+
+ if (I->Type != InlineAsm::isOutput) {
+ ++ValueCount;
+ continue; // Ignore non-output constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ // Unpack the dest.
+ Value *DestVal;
+ int DestValNo = -1;
+
+ if (ValueCount < ResultVals.size()) {
+ DestVal = ResultVals[ValueCount].first;
+ DestValNo = ResultVals[ValueCount].second;
+ } else
+ DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+ if (I->isEarlyClobber)
+ C = "&"+C;
+
+ Out << "\"=" << C << "\"(" << GetValueName(DestVal);
+ if (DestValNo != -1)
+ Out << ".field" << DestValNo; // Multiple retvals.
+ Out << ")";
+ ++ValueCount;
+ }
+
+
+ // Convert over all the input constraints.
+ Out << "\n :";
+ IsFirst = true;
+ ValueCount = 0;
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isInput) {
+ ++ValueCount;
+ continue; // Ignore non-input constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
+ Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+ Out << "\"" << C << "\"(";
+ if (!I->isIndirect)
+ writeOperand(SrcVal);
+ else
+ writeOperandDeref(SrcVal);
+ Out << ")";
+ }
+
+ // Convert over the clobber constraints.
+ IsFirst = true;
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isClobber)
+ continue; // Ignore non-input constraints.
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ Out << '\"' << C << '"';
+ }
+
+ Out << ")";
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ") alloca(sizeof(";
+ printType(Out, I.getType()->getElementType());
+ Out << ')';
+ if (I.isArrayAllocation()) {
+ Out << " * " ;
+ writeOperand(I.getOperand(0));
+ }
+ Out << ')';
+}
+
+void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static) {
+
+ // If there are no indices, just print out the pointer.
+ if (I == E) {
+ writeOperand(Ptr);
+ return;
+ }
+
+ // Find out if the last index is into a vector. If so, we have to print this
+ // specially. Since vectors can't have elements of indexable type, only the
+ // last index could possibly be of a vector element.
+ VectorType *LastIndexIsVector = 0;
+ {
+ for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
+ LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
+ }
+
+ Out << "(";
+
+ // If the last index is into a vector, we can't print it as &a[i][j] because
+ // we can't index into a vector with j in GCC. Instead, emit this as
+ // (((float*)&a[i])+j)
+ if (LastIndexIsVector) {
+ Out << "((";
+ printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
+ Out << ")(";
+ }
+
+ Out << '&';
+
+ // If the first index is 0 (very typical) we can do a number of
+ // simplifications to clean up the code.
+ Value *FirstOp = I.getOperand();
+ if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
+ // First index isn't simple, print it the hard way.
+ writeOperand(Ptr);
+ } else {
+ ++I; // Skip the zero index.
+
+ // Okay, emit the first operand. If Ptr is something that is already address
+ // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
+ if (isAddressExposed(Ptr)) {
+ writeOperandInternal(Ptr, Static);
+ } else if (I != E && (*I)->isStructTy()) {
+ // If we didn't already emit the first operand, see if we can print it as
+ // P->f instead of "P[0].f"
+ writeOperand(Ptr);
+ Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ ++I; // eat the struct index as well.
+ } else {
+ // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
+ Out << "(*";
+ writeOperand(Ptr);
+ Out << ")";
+ }
+ }
+
+ for (; I != E; ++I) {
+ if ((*I)->isStructTy()) {
+ Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ } else if ((*I)->isArrayTy()) {
+ Out << ".array[";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else if (!(*I)->isVectorTy()) {
+ Out << '[';
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else {
+ // If the last index is into a vector, then print it out as "+j)". This
+ // works with the 'LastIndexIsVector' code above.
+ if (isa<Constant>(I.getOperand()) &&
+ cast<Constant>(I.getOperand())->isNullValue()) {
+ Out << "))"; // avoid "+0".
+ } else {
+ Out << ")+(";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << "))";
+ }
+ }
+ }
+ Out << ")";
+}
+
+void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
+ bool IsVolatile, unsigned Alignment) {
+
+ bool IsUnaligned = Alignment &&
+ Alignment < TD->getABITypeAlignment(OperandType);
+
+ if (!IsUnaligned)
+ Out << '*';
+ if (IsVolatile || IsUnaligned) {
+ Out << "((";
+ if (IsUnaligned)
+ Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
+ printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
+ if (IsUnaligned) {
+ Out << "; } ";
+ if (IsVolatile) Out << "volatile ";
+ Out << "*";
+ }
+ Out << ")";
+ }
+
+ writeOperand(Operand);
+
+ if (IsVolatile || IsUnaligned) {
+ Out << ')';
+ if (IsUnaligned)
+ Out << "->data";
+ }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+ writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+ I.getAlignment());
+
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+ writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+ I.isVolatile(), I.getAlignment());
+ Out << " = ";
+ Value *Operand = I.getOperand(0);
+ Constant *BitMask = 0;
+ if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (!ITy->isPowerOf2ByteWidth())
+ // We have a bit width that doesn't match an even power-of-2 byte
+ // size. Consequently we must & the value with the type's bit mask
+ BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+ if (BitMask)
+ Out << "((";
+ writeOperand(Operand);
+ if (BitMask) {
+ Out << ") & ";
+ printConstant(BitMask, false);
+ Out << ")";
+ }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+ gep_type_end(I), false);
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+ Out << "va_arg(*(va_list*)";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ printType(Out, I.getType());
+ Out << ");\n ";
+}
+
+void CWriter::visitInsertElementInst(InsertElementInst &I) {
+ Type *EltTy = I.getType()->getElementType();
+ writeOperand(I.getOperand(0));
+ Out << ";\n ";
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(&I) << "))[";
+ writeOperand(I.getOperand(2));
+ Out << "] = (";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+ // We know that our operand is not inlined.
+ Out << "((";
+ Type *EltTy =
+ cast<VectorType>(I.getOperand(0)->getType())->getElementType();
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
+ writeOperand(I.getOperand(1));
+ Out << "]";
+}
+
+void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Out << "(";
+ printType(Out, SVI.getType());
+ Out << "){ ";
+ VectorType *VT = SVI.getType();
+ unsigned NumElts = VT->getNumElements();
+ Type *EltTy = VT->getElementType();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i) Out << ", ";
+ int SrcVal = SVI.getMaskValue(i);
+ if ((unsigned)SrcVal >= NumElts*2) {
+ Out << " 0/*undef*/ ";
+ } else {
+ Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
+ if (isa<Instruction>(Op)) {
+ // Do an extractelement of this value from the appropriate input.
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(Op)
+ << "))[" << (SrcVal & (NumElts-1)) << "]";
+ } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+ Out << "0";
+ } else {
+ printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+ (NumElts-1)),
+ false);
+ }
+ }
+ }
+ Out << "}";
+}
+
+void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+ // Start by copying the entire aggregate value into the result variable.
+ writeOperand(IVI.getOperand(0));
+ Out << ";\n ";
+
+ // Then do the insert to update the field.
+ Out << GetValueName(&IVI);
+ for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
+ i != e; ++i) {
+ Type *IndexedTy =
+ ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
+ makeArrayRef(b, i+1));
+ if (IndexedTy->isArrayTy())
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ Out << " = ";
+ writeOperand(IVI.getOperand(1));
+}
+
+void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+ Out << "(";
+ if (isa<UndefValue>(EVI.getOperand(0))) {
+ Out << "(";
+ printType(Out, EVI.getType());
+ Out << ") 0/*UNDEF*/";
+ } else {
+ Out << GetValueName(EVI.getOperand(0));
+ for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+ i != e; ++i) {
+ Type *IndexedTy =
+ ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+ makeArrayRef(b, i+1));
+ if (IndexedTy->isArrayTy())
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ }
+ Out << ")";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+
+ PM.add(createGCLoweringPass());
+ PM.add(createLowerInvokePass());
+ PM.add(createCFGSimplificationPass()); // clean up after lower invoke.
+ PM.add(new CWriter(o));
+ PM.add(createGCInfoDeleter());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 0000000..4f1ca97
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,42 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+ CTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : TargetMachine(T, TT, CPU, FS) {}
+
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
+
+ virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
new file mode 100644
index 0000000..e8274ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCBackendTarget;
+
+extern "C" void LLVMInitializeCBackendTargetInfo() {
+ RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
+}
+
+extern "C" void LLVMInitializeCBackendTargetMC() {}
diff --git a/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 0000000..9468aee
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,449 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+ RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+ RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+ RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+ RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+ RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+ RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+ RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+ RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+ RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+ RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+ RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+ RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+ RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+ RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+ RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+ RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyi $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+ RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyui $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+ RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB),
+ (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+ RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+ RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpys $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+ RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+ RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+ RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+ RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+ RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "and\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+ RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "andc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+ RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "andbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+ RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+ RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "or\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+ RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "addc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "orbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "orhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "xor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "xorbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xorhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nand\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+ Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+ (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqbi:
+ Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+ (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+ Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+ (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqby:
+ Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+ (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+ Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+ (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+ RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceq\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+ RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+ RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "ceqbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqh\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+ RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+ RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+ RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+ RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "cgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+ RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+ RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+ RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+ RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+ RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+ RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "clgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fa\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fs\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+ RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+ RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fceq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+ RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+ RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmeq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+ RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+ RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+ RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+ RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+ RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+ RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+ RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+ RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+ RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
new file mode 100644
index 0000000..8c1176a
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCAsmInfo.h"
+using namespace llvm;
+
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = "\t.quad\t";
+ AlignmentIsInBytes = false;
+
+ PCSymbol = ".";
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+
+ // Has leb128
+ HasLEB128 = true;
+
+ SupportsDebugInformation = true;
+
+ // Exception handling is not supported on CellSPU (think about it: you only
+ // have 256K for code+data. Would you support exception handling?)
+ ExceptionsType = ExceptionHandling::None;
+
+ // SPU assembly requires ".section" before ".bss"
+ UsesELFSectionDirectiveForBSS = true;
+}
+
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
new file mode 100644
index 0000000..7f850d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
@@ -0,0 +1,28 @@
+//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct SPULinuxMCAsmInfo : public MCAsmInfo {
+ explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
+ };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
new file mode 100644
index 0000000..d5af2a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -0,0 +1,92 @@
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell SPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCTargetDesc.h"
+#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSPUMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSPUMCRegisterInfo(X, SPU::R0);
+ return X;
+}
+
+static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
+ createSPUMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
+ createCellSPUMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+ createSPUMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
new file mode 100644
index 0000000..a3717b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides CellSPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheCellSPUTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPU instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.h b/contrib/llvm/lib/Target/CellSPU/SPU.h
new file mode 100644
index 0000000..b51fbc7
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,31 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "MCTargetDesc/SPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
+
+}
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.td b/contrib/llvm/lib/Target/CellSPU/SPU.td
new file mode 100644
index 0000000..8327fe0
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+ SubtargetFeature<"large_mem","UseLargeMem", "true",
+ "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+ let InstructionSet = SPUInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 0000000..3031fda
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+// Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+ (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 0000000..f340edf
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,408 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+// Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant. In the scalar case, we're only interested in the
+// top two 32-bit slots, whereas we're interested in an exact
+// all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+// [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+// compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+ CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64eq:
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64compare:
+ CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+ CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64compare:
+ CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+ CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+ CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+ I64LGEv2i64.Fragment>;
+
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+ CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64eq:
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64compare:
+ CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+ CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64compare:
+ CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+ CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+ CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+ I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS
+ v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+ SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+ [(set R64FP:$rT,
+ (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
new file mode 100644
index 0000000..90b5270
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -0,0 +1,334 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SPUAsmPrinter : public AsmPrinter {
+ public:
+ explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
+ AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description.
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ void printOp(const MachineOperand &MO, raw_ostream &OS);
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO, O);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+
+ void
+ printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value < (1 << 8) && "Invalid u7 argument");
+ O << value;
+ }
+
+ void
+ printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ char value = MI->getOperand(OpNo).getImm();
+ O << (int) value;
+ O << "(";
+ printOperand(MI, OpNo+1, O);
+ O << ")";
+ }
+
+ void
+ printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ O << (short) MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ O << getRegisterName(MO.getReg()) << ", ";
+ printOperand(MI, OpNo+1, O);
+ }
+
+ void
+ printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+ O << value;
+ }
+
+ void
+ printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+ && "Invalid s10 argument");
+ O << value;
+ }
+
+ void
+ printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+ O << value;
+ }
+
+ void
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ assert(MI->getOperand(OpNo).isImm() &&
+ "printDFormAddr first operand is not immediate");
+ int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+ int16_t value16 = int16_t(value);
+ assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+ && "Invalid dform s10 offset argument");
+ O << (value16 & ~0xf) << "(";
+ printOperand(MI, OpNo+1, O);
+ O << ")";
+ }
+
+ void
+ printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ /* Note: operand 1 is an offset or symbol name. */
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ if (MI->getOperand(OpNo+1).isImm()) {
+ int displ = int(MI->getOperand(OpNo+1).getImm());
+ if (displ > 0)
+ O << "+" << displ;
+ else if (displ < 0)
+ O << displ;
+ }
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ // Used to generate a ".-<target>", but it turns out that the assembler
+ // really wants the target.
+ //
+ // N.B.: This operand is used for call targets. Branch hints are another
+ // animal entirely.
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ O << "@h";
+ }
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ O << "@l";
+ }
+ }
+
+ /// Print local store address
+ void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value < 16)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
+ assert(MI->getOperand(OpNo).isImm() &&
+ "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value <= 32)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ }
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ report_fatal_error("printOp() does not handle immediate values");
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
+ << "$non_lazy_ptr";
+ return;
+ }
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ // External or weakly linked global variables need non-lazily-resolved
+ // stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ const GlobalValue *GV = MO.getGlobal();
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ return;
+ }
+ }
+ O << *Mang->getSymbol(MO.getGlobal());
+ return;
+ case MachineOperand::MO_MCSymbol:
+ O << *(MO.getMCSymbol());
+ return;
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemRegReg(MI, OpNo, O);
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() {
+ RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 0000000..04fa2ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,57 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
+def RetCC_SPU : CallingConv<[
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CCC_SPU : CallingConv<[
+ CCIfType<[i8, i16, i32, i64, i128, f32, f64,
+ v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>
+]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 0000000..093f99f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,255 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return MFI->getStackSize() &&
+ (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI.hasDebugInfo();
+ MCSymbol *FrameLabel = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isInt<10>(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (isInt<16>(FrameSize)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+ }
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const{
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
new file mode 100644
index 0000000..b837f2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
@@ -0,0 +1,80 @@
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
+
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUSubtarget;
+
+ class SPUFrameLowering: public TargetFrameLowering {
+ const SPUSubtarget &Subtarget;
+ std::pair<unsigned, int> LR[1];
+
+ public:
+ SPUFrameLowering(const SPUSubtarget &sti);
+
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ //! Return a function's saved spill slots
+ /*!
+ For CellSPU, a function's saved spill slots is just the link register.
+ */
+ const std::pair<unsigned, int> *
+ getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+ //! Stack slot size (16 bytes)
+ static int stackSlotSize() {
+ return 16;
+ }
+ //! Maximum frame offset representable by a signed 10-bit integer
+ /*!
+ This is the maximum frame offset that can be expressed as a 10-bit
+ integer, used in D-form addresses.
+ */
+ static int maxFrameOffset() {
+ return ((1 << 9) - 1) * stackSlotSize();
+ }
+ //! Minimum frame offset representable by a signed 10-bit integer
+ static int minFrameOffset() {
+ return -(1 << 9) * stackSlotSize();
+ }
+ //! Minimum frame size (enough to spill LR + SP)
+ static int minStackSize() {
+ return (2 * stackSlotSize());
+ }
+ //! Convert frame index to stack offset
+ static int FItoStackOffset(int frame_index) {
+ return frame_index * stackSlotSize();
+ }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 0000000..403d7ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,141 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+ TII(tii),
+ EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
+{
+ // Initial thoughts on how to do this, but this code cannot work unless the
+ // function's prolog and epilog code are also being scheduled so that we can
+ // accurately determine which pipeline is being scheduled.
+#if 0
+ assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ ScheduleHazardRecognizer::HazardType retval = NoHazard;
+ bool mustBeOdd = false;
+
+ switch (Node->getOpcode()) {
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16:
+ case SPU::LQAv16i8:
+ case SPU::LQAv8i16:
+ case SPU::LQAv4i32:
+ case SPU::LQAv4f32:
+ case SPU::LQAv2f64:
+ case SPU::LQAr128:
+ case SPU::LQAr64:
+ case SPU::LQAr32:
+ case SPU::LQXv4i32:
+ case SPU::LQXr128:
+ case SPU::LQXr64:
+ case SPU::LQXr32:
+ case SPU::LQXr16:
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8:
+ case SPU::STQAv16i8:
+ case SPU::STQAv8i16:
+ case SPU::STQAv4i32:
+ case SPU::STQAv4f32:
+ case SPU::STQAv2f64:
+ case SPU::STQAr128:
+ case SPU::STQAr64:
+ case SPU::STQAr32:
+ case SPU::STQAr16:
+ case SPU::STQAr8:
+ case SPU::STQXv16i8:
+ case SPU::STQXv8i16:
+ case SPU::STQXv4i32:
+ case SPU::STQXv4f32:
+ case SPU::STQXv2f64:
+ case SPU::STQXr128:
+ case SPU::STQXr64:
+ case SPU::STQXr32:
+ case SPU::STQXr16:
+ case SPU::STQXr8:
+ case SPU::RET:
+ mustBeOdd = true;
+ break;
+ default:
+ // Assume that this instruction can be on the even pipe
+ break;
+ }
+
+ if (mustBeOdd && !EvenOdd)
+ retval = Hazard;
+
+ DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
+ << retval << "\n");
+ EvenOdd ^= 1;
+ return retval;
+#else
+ return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+ DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+ AdvanceCycle();
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 0000000..675632c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+ const TargetInstrInfo &TII;
+ int EvenOdd;
+
+public:
+ SPUHazardRecognizer(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 0000000..a297d03
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1203 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameLowering.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI32IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+ bool
+ isI32IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isUInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+ bool
+ isI16IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isUInt<10>((short) CN->getZExtValue());
+ }
+
+ //! ConstantSDNode predicate for signed 16-bit values
+ /*!
+ \arg CN The constant SelectionDAG node holding the value
+ \arg Imm The returned 16-bit value, if returning true
+
+ This predicate tests the value in \a CN to see whether it can be
+ represented as a 16-bit, sign-extended quantity. Returns true if
+ this is the case.
+ */
+ bool
+ isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+ {
+ EVT vt = CN->getValueType(0);
+ Imm = (short) CN->getZExtValue();
+ if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+ return true;
+ } else if (vt == MVT::i32) {
+ int32_t i_val = (int32_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ } else {
+ int64_t i_val = (int64_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ }
+
+ return false;
+ }
+
+ //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+ static bool
+ isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+ {
+ EVT vt = FPN->getValueType(0);
+ if (vt == MVT::f32) {
+ int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+ int sval = (int) ((val << 16) >> 16);
+ Imm = (short) val;
+ return val == sval;
+ }
+
+ return false;
+ }
+
+ //! Generate the carry-generate shuffle mask.
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //! Generate the borrow-generate shuffle mask
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //===------------------------------------------------------------------===//
+ /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class SPUDAGToDAGISel :
+ public SelectionDAGISel
+ {
+ const SPUTargetMachine &TM;
+ const SPUTargetLowering &SPUtli;
+ unsigned GlobalBaseReg;
+
+ public:
+ explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm),
+ SPUtli(*tm.getTargetLowering())
+ { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint32_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+ }
+
+ SDNode *emitBuildVector(SDNode *bvNode) {
+ EVT vecVT = bvNode->getValueType(0);
+ DebugLoc dl = bvNode->getDebugLoc();
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant by invoking all of the instruction selection predicates:
+ if (((vecVT == MVT::v8i16) &&
+ (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+ ((vecVT == MVT::v4i32) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+ ((vecVT == MVT::v2i64) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+ HandleSDNode Dummy(SDValue(bvNode, 0));
+ if (SDNode *N = Select(bvNode))
+ return N;
+ return Dummy.getValue().getNode();
+ }
+
+ // No, need to emit a constant pool spill:
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
+ ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ }
+
+ const Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
+
+ HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, Alignment));
+ CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ }
+
+ /// Select - Convert the specified operand from a target-independent to a
+ /// target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
+
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
+
+ //! Alternate instruction emit sequence for loading i64 constants
+ SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
+
+ //! Returns true if the address N is an A-form (local store) address
+ bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ //! D-form address predicate
+ bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// Alternate D-form address using i7 offset predicate
+ bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base);
+
+ /// D-form address selection workhorse
+ bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base, int minOffset, int maxOffset);
+
+ //! Address predicate if N can be expressed as an indexed [r+r] operation.
+ bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+ && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
+ SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+ && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+#if 1
+ llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+#endif
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "Cell SPU DAG->DAG Pattern Instruction Selection";
+ }
+
+ private:
+ SDValue getRC( MVT );
+
+ // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+ };
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ // These match the addr256k operand type:
+ EVT OffsVT = MVT::i16;
+ SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+ int64_t val;
+
+ switch (N.getOpcode()) {
+ case ISD::Constant:
+ val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+ Base = CurDAG->getTargetConstant( val , MVT::i32);
+ Index = Zero;
+ return true; break;
+ case ISD::ConstantPool:
+ case ISD::GlobalAddress:
+ report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
+ /*NOTREACHED*/
+
+ case ISD::TargetConstant:
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetJumpTable:
+ report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+ "not wrapped as A-form address.");
+ /*NOTREACHED*/
+
+ case SPUISD::AFormAddr:
+ // Just load from memory if there's only a single use of the location,
+ // otherwise, this will get handled below with D-form offset addresses
+ if (N.hasOneUse()) {
+ SDValue Op0 = N.getOperand(0);
+ switch (Op0.getOpcode()) {
+ case ISD::TargetConstantPool:
+ case ISD::TargetJumpTable:
+ Base = Op0;
+ Index = Zero;
+ return true;
+
+ case ISD::TargetGlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+ const GlobalValue *GV = GSDN->getGlobal();
+ if (GV->getAlignment() == 16) {
+ Base = Op0;
+ Index = Zero;
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ const int minDForm2Offset = -(1 << 7);
+ const int maxDForm2Offset = (1 << 7) - 1;
+ return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+ maxDForm2Offset);
+}
+
+/*!
+ \arg Op The ISD instruction (ignored)
+ \arg N The address to be tested
+ \arg Base Base address register/pointer
+ \arg Index Base address index
+
+ Examine the input address by a base register plus a signed 10-bit
+ displacement, [r+I10] (D-form address).
+
+ \return true if \a N is a D-form address with \a Base and \a Index set
+ to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return DFormAddressPredicate(Op, N, Base, Index,
+ SPUFrameLowering::minFrameOffset(),
+ SPUFrameLowering::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index, int minOffset,
+ int maxOffset) {
+ unsigned Opc = N.getOpcode();
+ EVT PtrTy = SPUtli.getPointerTy();
+
+ if (Opc == ISD::FrameIndex) {
+ // Stack frame index must be less than 512 (divided by 16):
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
+ << FI << "\n");
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (Opc == ISD::ADD) {
+ // Generated by getelementptr
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+ || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (Op1.getOpcode() == ISD::Constant
+ || Op1.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op0.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op0;
+ return true;
+ }
+ } else if (Op0.getOpcode() == ISD::Constant
+ || Op0.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op1.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op1;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Indirect with constant offset -> D-Form address
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::Hi
+ && Op1.getOpcode() == SPUISD::Lo) {
+ // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+ int32_t offset = 0;
+ SDValue idxOp;
+
+ if (isa<ConstantSDNode>(Op1)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op0;
+ } else if (isa<ConstantSDNode>(Op0)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op1;
+ }
+
+ if (offset >= minOffset && offset <= maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = idxOp;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::AFormAddr) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == SPUISD::LDRESULT) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
+ unsigned OpOpc = Op->getOpcode();
+
+ if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+ // Direct load/store without getelementptr
+ SDValue Offs;
+
+ Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
+
+ if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+ if (Offs.getOpcode() == ISD::UNDEF)
+ Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+ Base = Offs;
+ Index = N;
+ return true;
+ }
+ } else {
+ /* If otherwise unadorned, default to D-form address with 0 offset: */
+ if (Opc == ISD::CopyFromReg) {
+ Index = N.getOperand(1);
+ } else {
+ Index = N;
+ }
+
+ Base = CurDAG->getTargetConstant(0, Index.getValueType());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address operand
+ \arg Base The base pointer operand
+ \arg Index The offset/index operand
+
+ If the address \a N can be expressed as an A-form or D-form address, returns
+ false. Otherwise, creates two operands, Base and Index that will become the
+ (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ if (!SelectAFormAddr(Op, N, Base, Index)
+ && !SelectDFormAddr(Op, N, Base, Index)) {
+ // If the address is neither A-form or D-form, punt and use an X-form
+ // address:
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+ switch( VT.SimpleTy ) {
+ case MVT::i8:
+ return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i16:
+ return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i32:
+ return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::f32:
+ return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i64:
+ return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i128:
+ return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+ break;
+ default:
+ assert( false && "add a new case here" );
+ }
+ return SDValue();
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ int n_ops = -1;
+ unsigned NewOpc = 0;
+ EVT OpVT = N->getValueType(0);
+ SDValue Ops[8];
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ if (Opc == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
+
+ if (FI < 128) {
+ NewOpc = SPU::AIr32;
+ Ops[0] = TFI;
+ Ops[1] = Imm0;
+ n_ops = 2;
+ } else {
+ NewOpc = SPU::Ar32;
+ Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
+ N->getValueType(0), TFI),
+ 0);
+ n_ops = 2;
+ }
+ } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+ // Catch the i64 constants that end up here. Note: The backend doesn't
+ // attempt to legalize the constant (it's useless because DAGCombiner
+ // will insert 64-bit constants and we can't stop it).
+ return SelectI64Constant(N, OpVT, N->getDebugLoc());
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = N->getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+ EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ Op0VT, (128 / Op0VT.getSizeInBits()));
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
+ /*NOTREACHED*/
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
+ }
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
+
+ HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+ Op0VecVT, Op0));
+
+ SDValue PromScalar;
+ if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+ PromScalar = SDValue(N, 0);
+ else
+ PromScalar = PromoteScalar.getValue();
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ PromScalar, PromScalar,
+ SDValue(shufMaskLoad, 0));
+
+ HandleSDNode Dummy2(zextShuffle);
+ if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+ zextShuffle = SDValue(N, 0);
+ else
+ zextShuffle = Dummy2.getValue();
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ SelectCode(Dummy.getValue().getNode());
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::TRUNCATE) {
+ SDValue Op0 = N->getOperand(0);
+ if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+ && OpVT == MVT::i32
+ && Op0.getValueType() == MVT::i64) {
+ // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+ //
+ // Take advantage of the fact that the upper 32 bits are in the
+ // i32 preferred slot and avoid shuffle gymnastics:
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (CN != 0) {
+ unsigned shift_amt = unsigned(CN->getZExtValue());
+
+ if (shift_amt >= 32) {
+ SDNode *hi32 =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ Op0.getOperand(0), getRC(MVT::i32));
+
+ shift_amt -= 32;
+ if (shift_amt > 0) {
+ // Take care of the additional shift, if present:
+ SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+ unsigned Opc = SPU::ROTMAIr32_i32;
+
+ if (Op0.getOpcode() == ISD::SRL)
+ Opc = SPU::ROTMr32;
+
+ hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
+ shift);
+ }
+
+ return hi32;
+ }
+ }
+ }
+ } else if (Opc == ISD::SHL) {
+ if (OpVT == MVT::i64)
+ return SelectSHLi64(N, OpVT);
+ } else if (Opc == ISD::SRL) {
+ if (OpVT == MVT::i64)
+ return SelectSRLi64(N, OpVT);
+ } else if (Opc == ISD::SRA) {
+ if (OpVT == MVT::i64)
+ return SelectSRAi64(N, OpVT);
+ } else if (Opc == ISD::FNEG
+ && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+ DebugLoc dl = N->getDebugLoc();
+ // Check if the pattern is a special form of DFNMS:
+ // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getOpcode() == ISD::FSUB) {
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == ISD::FMUL) {
+ unsigned Opc = SPU::DFNMSf64;
+ if (OpVT == MVT::v2f64)
+ Opc = SPU::DFNMSv2f64;
+
+ return CurDAG->getMachineNode(Opc, dl, OpVT,
+ Op00.getOperand(0),
+ Op00.getOperand(1),
+ Op0.getOperand(1));
+ }
+ }
+
+ SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+ SDNode *signMask = 0;
+ unsigned Opc = SPU::XORfneg64;
+
+ if (OpVT == MVT::f64) {
+ signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
+ } else if (OpVT == MVT::v2f64) {
+ Opc = SPU::XORfnegvec;
+ signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v2i64,
+ negConst, negConst).getNode());
+ }
+
+ return CurDAG->getMachineNode(Opc, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ } else if (Opc == ISD::FABS) {
+ if (OpVT == MVT::f64) {
+ SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+ return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ } else if (OpVT == MVT::v2f64) {
+ SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+ SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ absConst, absConst);
+ SDNode *signMask = emitBuildVector(absVec.getNode());
+ return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ }
+ } else if (Opc == SPUISD::LDRESULT) {
+ // Custom select instructions for LDRESULT
+ EVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+ SDValue Chain = N->getOperand(1);
+ SDNode *Result;
+
+ Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+ MVT::Other, Arg,
+ getRC( VT.getSimpleVT()), Chain);
+ return Result;
+
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
+ Ops[1] = Op1;
+ if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+ if (isInt<10>(CN->getSExtValue())) {
+ NewOpc = SPU::AIr32;
+ Ops[1] = Op1;
+ } else {
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+ N->getValueType(0),
+ Op1),
+ 0);
+ }
+ }
+ Ops[0] = Op0;
+ n_ops = 2;
+ }
+ }
+
+ if (n_ops > 0) {
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+ else
+ return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
+ } else
+ return SelectCode(N);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+ SDValue SelMaskVal;
+ DebugLoc dl = N->getDebugLoc();
+
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
+ SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+ SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+ ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0, OpVT));
+ VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(ZeroFill, 0),
+ SDValue(VecOp0, 0),
+ SDValue(SelMask, 0));
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *Shift = 0;
+ DebugLoc dl = N->getDebugLoc();
+
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+ // Ensure that the shift amounts are negated!
+ Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bytes, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bits, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
+ // Promote Op0 to vector
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDNode *VecOp0 =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ VecVT, N->getOperand(0), getRC(MVT::v2i64));
+
+ SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+ SDNode *SignRot =
+ CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+ SDValue(VecOp0, 0), SignRotAmt);
+ SDNode *UpperHalfSign =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
+
+ SDNode *UpperHalfSignMask =
+ CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+ SDNode *UpperLowerMask =
+ CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+ SDNode *UpperLowerSelect =
+ CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(UpperHalfSignMask, 0),
+ SDValue(VecOp0, 0),
+ SDValue(UpperLowerMask, 0));
+
+ SDNode *Shift = 0;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ bytes = 31 - bytes;
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
+ SDValue(UpperLowerSelect, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ bits = 8 - bits;
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *NegShift =
+ CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+ SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(NegShift, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
+ DebugLoc dl) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
+ DebugLoc dl) {
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
+ SDValue i64vec =
+ SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+ // Here's where it gets interesting, because we have to parse out the
+ // subtree handed back in i64vec:
+
+ if (i64vec.getOpcode() == ISD::BITCAST) {
+ // The degenerate case where the upper and lower bits in the splat are
+ // identical:
+ SDValue Op0 = i64vec.getOperand(0);
+
+ ReplaceUses(i64vec, Op0);
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(Op0.getNode()), 0),
+ getRC(MVT::i64));
+ } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+ SDValue lhs = i64vec.getOperand(0);
+ SDValue rhs = i64vec.getOperand(1);
+ SDValue shufmask = i64vec.getOperand(2);
+
+ if (lhs.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(lhs, lhs.getOperand(0));
+ lhs = lhs.getOperand(0);
+ }
+
+ SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+ ? lhs.getNode()
+ : emitBuildVector(lhs.getNode()));
+
+ if (rhs.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(rhs, rhs.getOperand(0));
+ rhs = rhs.getOperand(0);
+ }
+
+ SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+ ? rhs.getNode()
+ : emitBuildVector(rhs.getNode()));
+
+ if (shufmask.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(shufmask, shufmask.getOperand(0));
+ shufmask = shufmask.getOperand(0);
+ }
+
+ SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+ ? shufmask.getNode()
+ : emitBuildVector(shufmask.getNode()));
+
+ SDValue shufNode =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+ SDValue(shufMaskNode, 0));
+ HandleSDNode Dummy(shufNode);
+ SDNode *SN = SelectCode(Dummy.getValue().getNode());
+ if (SN == 0) SN = Dummy.getValue().getNode();
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(SN, 0), getRC(MVT::i64));
+ } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(i64vec.getNode()), 0),
+ getRC(MVT::i64));
+ } else {
+ report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+ "condition");
+ }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+ return new SPUDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 0000000..ac33111
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,3263 @@
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameLowering.h"
+#include "SPUMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+// Used in getTargetNodeName() below
+namespace {
+ std::map<unsigned, const char *> node_names;
+
+ // Byte offset of the preferred slot (counted from the MSB)
+ int prefslotOffset(EVT VT) {
+ int retval=0;
+ if (VT==MVT::i1) retval=3;
+ if (VT==MVT::i8) retval=3;
+ if (VT==MVT::i16) retval=2;
+
+ return retval;
+ }
+
+ //! Expand a library call into an actual call DAG node
+ /*!
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
+ */
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ Type *RetTy =
+ Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op.getDebugLoc());
+
+ return CallInfo.first;
+ }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+ SPUTM(TM) {
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set RTLIB libcall names as used by SPU:
+ setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+ // Set up the SPU's register classes:
+ addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
+ addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
+ addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
+ addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
+ addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
+ addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
+ addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+ // SPU has no sign or zero extended loads for i1, i8, i16:
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // SPU constant load actions are custom lowered:
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+ // SPU's loads and stores have to be custom lowered:
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ // Expand the jumptable branches
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ // SPU has no intrinsics for these particular operations:
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+
+ // SPU has no division/remainder instructions
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
+
+ // We don't support sin/cos/sqrt/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // SPU can do rotate right and left, so legalize it... but customize for i8
+ // because instructions don't exist.
+
+ // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+ // .td files.
+ setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i16, Legal);
+ setOperationAction(ISD::ROTL, MVT::i8, Custom);
+
+ // SPU has no native version of shift left/right for i8
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
+
+ // Custom lower i8, i32 and i64 multiplications
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+
+ // Expand double-width multiplication
+ // FIXME: It would probably be reasonable to support some of these operations
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+ // Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
+
+ // SPU does not have BSWAP. It does have i32 support CTLZ.
+ // CTPOP has to be custom lowered.
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
+
+ setOperationAction(ISD::CTTZ , MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+
+ setOperationAction(ISD::CTLZ , MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i16, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+
+ // SPU has a version of select that implements (a&~c)|(b&c), just like
+ // select ought to work:
+ setOperationAction(ISD::SELECT, MVT::i8, Legal);
+ setOperationAction(ISD::SELECT, MVT::i16, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::i16, Legal);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ // Custom lower i128 -> i64 truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+ // Custom lower i32/i64 -> i128 sign extend
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
+
+ // FDIV on SPU requires custom lowering
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
+
+ // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
+
+ // Cell SPU has instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // Set operation actions to legal types only.
+ if (!isTypeLegal(VT)) continue;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ // mul has to be custom lowered.
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ // These operations need to be expanded:
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Custom lower build_vector, constant pool spills, insert and
+ // extract vector elements:
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+
+ setOperationAction(ISD::AND, MVT::v16i8, Custom);
+ setOperationAction(ISD::OR, MVT::v16i8, Custom);
+ setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
+
+ setStackPointerRegisterToSaveRestore(SPU::R1);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ setMinFunctionAlignment(3);
+
+ computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(Sched::RegPressure);
+}
+
+const char *
+SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ if (node_names.empty()) {
+ node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
+ node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
+ node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
+ node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+ node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+ node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
+ node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
+ node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
+ node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
+ node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
+ node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+ node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
+ node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+ node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
+ node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
+ node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+ "SPUISD::ROTBYTES_LEFT_BITS";
+ node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
+ node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
+ }
+
+ std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
+
+ return ((i != node_names.end()) ? i->second : 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
+ // i8, i16 and i32 are valid SETCC result types
+ MVT::SimpleValueType retval;
+
+ switch(VT.getSimpleVT().SimpleTy){
+ case MVT::i1:
+ case MVT::i8:
+ retval = MVT::i8; break;
+ case MVT::i16:
+ retval = MVT::i16; break;
+ case MVT::i32:
+ default:
+ retval = MVT::i32;
+ }
+ return retval;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ LoadSDNode *LN = cast<LoadSDNode>(Op);
+ SDValue the_chain = LN->getChain();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT InVT = LN->getMemoryVT();
+ EVT OutVT = Op.getValueType();
+ ISD::LoadExtType ExtType = LN->getExtensionType();
+ unsigned alignment = LN->getAlignment();
+ int pso = prefslotOffset(InVT);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+ (128 / InVT.getSizeInBits()));
+
+ // two sanity checks
+ assert( LN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = LN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(rotamt, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-pso, PtrVT));
+ }
+
+ // Do the load as a i128 to allow possible shifting
+ SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+
+ // When the size is not greater than alignment we get all data with just
+ // one load
+ if (alignment >= InVT.getSizeInBits()/8) {
+ // Update the chain
+ the_chain = low.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+ low.getValue(0), rotate);
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+ DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+ }
+ // When alignment is less than the size, we might need (known only at
+ // run-time) two loads
+ // TODO: if the memory address is composed only from constants, we have
+ // extra kowledge, and might avoid the second load
+ else {
+ // storage position offset from lower 16 byte aligned memory chunk
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+ // get a registerfull of ones. (this implementation is a workaround: LLVM
+ // cannot handle 128 bit signed int constants)
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(16, PtrVT)),
+ highMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ high.getValue(1));
+
+ // Shift the (possible) high part right to compensate the misalignemnt.
+ // if there is no highpart (i.e. value is i64 and offset is 4), this
+ // will zero out the high value.
+ high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+ DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset
+ ));
+
+ // Shift the low similarly
+ // TODO: add SPUISD::SHL_BYTES
+ low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+ // Merge the two parts
+ result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+ DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+ if (!InVT.isVector()) {
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+ }
+
+ }
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
+
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
+
+ result = DAG.getNode(NewOpc, dl, OutVT, result);
+ }
+
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+ SDValue retops[2] = {
+ result,
+ the_chain
+ };
+
+ result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+ retops, sizeof(retops) / sizeof(retops[0]));
+ return result;
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ StoreSDNode *SN = cast<StoreSDNode>(Op);
+ SDValue Value = SN->getValue();
+ EVT VT = Value.getValueType();
+ EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned alignment = SN->getAlignment();
+ SDValue result;
+ EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+ (128 / StVT.getSizeInBits()));
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = SN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+ // two sanity checks
+ assert( SN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Load the lower part of the memory to which to store.
+ SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+ // if we don't need to store over the 16 byte boundary, one store suffices
+ if (alignment >= StVT.getSizeInBits()/8) {
+ // Update the chain
+ the_chain = low.getValue(1);
+
+ LoadSDNode *LN = cast<LoadSDNode>(low);
+ SDValue theValue = SN->getValue();
+
+ if (StVT != VT
+ && (theValue.getOpcode() == ISD::AssertZext
+ || theValue.getOpcode() == ISD::AssertSext)) {
+ // Drill down and get the value for zero- and sign-extended
+ // quantities
+ theValue = theValue.getOperand(0);
+ }
+
+ // If the base pointer is already a D-form address, then just create
+ // a new D-form address with a slot offset and the orignal base pointer.
+ // Otherwise generate a D-form address with the slot offset relative
+ // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
+
+ result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+ vectorizeOp, low,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, insertEltOp));
+
+ result = DAG.getStore(the_chain, dl, result, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(),
+ 16);
+
+ }
+ // do the store when it might cross the 16 byte memory access boundary.
+ else {
+ // TODO issue a warning if SN->isVolatile()== true? This is likely not
+ // what the user wanted.
+
+ // address offset from nearest lower 16byte alinged address
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ SN->getBasePtr(),
+ DAG.getConstant(0xf, MVT::i32));
+ // 16 - offset
+ SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset);
+ // 16 - sizeof(Value)
+ SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ DAG.getConstant( VT.getSizeInBits()/8,
+ MVT::i32));
+ // get a registerfull of ones
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ // Create the 128 bit masks that have ones where the data to store is
+ // located.
+ SDValue lowmask, himask;
+ // if the value to store don't fill up the an entire 128 bits, zero
+ // out the last bits of the mask so that only the value we want to store
+ // is masked.
+ // this is e.g. in the case of store i32, align 2
+ if (!VT.isVector()){
+ Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+ lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ surplus);
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+ }
+ else {
+ lowmask = ones;
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ }
+ // this will zero, if there are no data that goes to the high quad
+ himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ offset_compl);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+ offset);
+
+ // Load in the old data and zero out the parts that will be overwritten with
+ // the new data to store.
+ SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ hi.getValue(1));
+
+ low = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+ hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+ // Shift the Value to store into place. rlow contains the parts that go to
+ // the lower memory chunk, rhi has the parts that go to the upper one.
+ SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+ rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+ SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+ offset_compl);
+
+ // Merge the old data and the new data and store the results
+ // Need to convert vectors here to integer as 'OR'ing floats assert
+ rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+ rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+ low = DAG.getStore(the_chain, dl, rlow, basePtr,
+ lowMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ hi = DAG.getStore(the_chain, dl, rhi,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+ hi.getValue(0));
+ }
+
+ return result;
+}
+
+//! Generate the address of a constant pool entry.
+static SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ // Just return the SDValue with the constant pool address in it.
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ llvm_unreachable("LowerConstantPool: Relocation model other than static"
+ " not supported.");
+ return SDValue();
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ llvm_unreachable("LowerJumpTable: Relocation model other than static"
+ " not supported.");
+ return SDValue();
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
+ const TargetMachine &TM = DAG.getTarget();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ } else {
+ report_fatal_error("LowerGlobalAddress: Relocation model other than static"
+ "not supported.");
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
+ uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
+ }
+
+ return SDValue();
+}
+
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
+
+ unsigned ArgOffset = SPUFrameLowering::minStackSize();
+ unsigned ArgRegIdx = 0;
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
+ // Add DAG nodes to load the arguments or copy them out of registers.
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
+
+ if (VA.isRegLoc()) {
+ const TargetRegisterClass *ArgRegClass;
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+ Twine(ObjectVT.getEVTString()));
+ case MVT::i8:
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
+ case MVT::i16:
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
+ case MVT::i32:
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
+ case MVT::i64:
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
+ case MVT::f32:
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
+ case MVT::f64:
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++ArgRegIdx;
+ } else {
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type
+ // or we're forced to do vararg
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
+ ArgOffset += StackSlotSize;
+ }
+
+ InVals.push_back(ArgVal);
+ // Update the chain
+ Chain = ArgVal.getOperand(0);
+ }
+
+ // vararg handling:
+ if (isVarArg) {
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const unsigned ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ unsigned NumArgRegs = 77;
+
+ // We will spill (79-3)+1 registers to the stack
+ SmallVector<SDValue, 79-3+1> MemOps;
+
+ // Create the frame slot
+ for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
+ SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
+ false, false, 0);
+ Chain = Store.getOperand(0);
+ MemOps.push_back(Store);
+
+ // Increment address by stack slot size for the next stored argument
+ ArgOffset += StackSlotSize;
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ return Chain;
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 14 >> 14) != Addr)
+ return 0; // Top 14 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // CellSPU target does not yet support tail call optimization.
+ isTailCall = false;
+
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ unsigned NumOps = Outs.size();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
+
+ // Handy pointer type
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory.
+ unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
+ unsigned ArgRegIdx = 0;
+
+ // Keep track of registers passing arguments
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ // And the arguments passed on the stack
+ SmallVector<SDValue, 8> MemOpChains;
+
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::i128:
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ }
+ }
+
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP].
+ unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
+
+ // Insert a call sequence start
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+ true));
+
+ if (!MemOpChains.empty()) {
+ // Adjust the stack pointer for the stack arguments.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = SPUISD::CALL;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
+
+ if (!ST->usingLargeMem()) {
+ // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+ // style calls, otherwise, external symbols are BRASL calls. This assumes
+ // that declared/defined symbols are in the same compilation unit and can
+ // be reached through PC-relative jumps.
+ //
+ // NOTE:
+ // This may be an unsafe assumption for JIT and really large compilation
+ // units.
+ if (GV->isDeclaration()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+ }
+ } else {
+ // "Large memory" mode: Turn all calls into indirect calls with a X-form
+ // address pairs:
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+ }
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ // If this is an absolute destination address that appears to be a legal
+ // local store address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // If the function returns void, just return the chain.
+ if (Ins.empty())
+ return Chain;
+
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
+ // If the call has results, copy the values out of the ret val registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+ SDValue OpVal(0, 0);
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return 0;
+ }
+
+ if (OpVal.getNode() != 0) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ return CN;
+ }
+ }
+
+ return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value <= 0x3ffff)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (isInt<10>(Value))
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int Value = (int) CN->getZExtValue();
+ if (ValueType == MVT::i16
+ && Value <= 0xffff /* truncated from uint64_t */
+ && ((short) Value >> 8) == ((short) Value & 0xff))
+ return DAG.getTargetConstant(Value & 0xff, ValueType);
+ else if (ValueType == MVT::i8
+ && (Value & 0xff) == Value)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if ((ValueType == MVT::i32
+ && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+ || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+ return DAG.getTargetConstant(Value >> 16, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+ }
+
+ return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
+
+ if (minSplatBits < 16)
+ minSplatBits = 16;
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
+
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+ Twine(VT.getEVTString()));
+ /*NOTREACHED*/
+ case MVT::v4f32: {
+ uint32_t Value32 = uint32_t(SplatBits);
+ assert(SplatBitSize == 32
+ && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(Value32, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+ break;
+ }
+ case MVT::v2f64: {
+ uint64_t f64val = uint64_t(SplatBits);
+ assert(SplatBitSize == 64
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(f64val, MVT::i64);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+ break;
+ }
+ case MVT::v16i8: {
+ // 8-bit constants have to be expanded to 16-bits
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+ }
+ case MVT::v8i16: {
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ }
+ case MVT::v4i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+ }
+ case MVT::v2i64: {
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ bool upper_special, lower_special;
+
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ SplatValCN, SplatValCN);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
+
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
+
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
+ }
+
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ }
+
+ return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // If we have a single element being moved from V1 to V2, this can be handled
+ // using the C*[DX] compute mask instructions, but the vector elements have
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
+ EVT VecVT = V1.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned EltsFromV2 = 0;
+ unsigned V2EltOffset = 0;
+ unsigned V2EltIdx0 = 0;
+ unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ bool monotonic = true;
+ bool rotate = true;
+ int rotamt=0;
+ EVT maskVT; // which of the c?d instructions to use
+
+ if (EltVT == MVT::i8) {
+ V2EltIdx0 = 16;
+ maskVT = MVT::v16i8;
+ } else if (EltVT == MVT::i16) {
+ V2EltIdx0 = 8;
+ maskVT = MVT::v8i16;
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+ V2EltIdx0 = 4;
+ maskVT = MVT::v4i32;
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
+ } else
+ llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+ for (unsigned i = 0; i != MaxElts; ++i) {
+ if (SVN->getMaskElt(i) < 0)
+ continue;
+
+ unsigned SrcElt = SVN->getMaskElt(i);
+
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
+
+ ++CurrElt;
+ }
+
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ } else {
+ rotate = false;
+ }
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
+ rotamt = SrcElt-i;
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
+ }
+
+ if (EltsFromV2 == 1 && monotonic) {
+ // Compute mask and shuffle
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2EltOffset, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
+ // Use shuffle mask in SHUFB synthetic instruction:
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+ ShufMaskOp);
+ } else if (rotate) {
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
+ } else {
+ // Convert the SHUFFLE_VECTOR mask's input element units to the
+ // actual bytes.
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+ unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+ }
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+ }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op0.getNode()->getOpcode() == ISD::Constant) {
+ // For a constant, build the appropriate constant vector, which will
+ // eventually simplify to a vector register load.
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+ SmallVector<SDValue, 16> ConstVecValues;
+ EVT VT;
+ size_t n_copies;
+
+ // Create a constant vector:
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected constant value type in "
+ "LowerSCALAR_TO_VECTOR");
+ case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+ case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+ case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+ case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+ case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+ case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+ }
+
+ SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+ for (size_t j = 0; j < n_copies; ++j)
+ ConstVecValues.push_back(CValue);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+ &ConstVecValues[0], ConstVecValues.size());
+ } else {
+ // Otherwise, copy the value from one register to another:
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDValue N = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+ }
+
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
+
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+ unsigned int ShufBytes[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
+
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
+
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+ N, N, ShufMaskVec));
+ } else {
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ EVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+ "vector type!");
+ }
+
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
+
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
+
+ vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
+
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+ "type");
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ loFactor, hiFactor, loFactor, hiFactor);
+ break;
+ }
+ }
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ vecShift, vecShift, replicate));
+ }
+
+ return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ SDValue VecOp = Op.getOperand(0);
+ SDValue ValOp = Op.getOperand(1);
+ SDValue IdxOp = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
+
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Offset=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+ }
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
+
+ SDValue result =
+ DAG.getNode(SPUISD::SHUFB, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+ VecOp,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
+
+ return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
+{
+ SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
+
+ assert(Op.getValueType() == MVT::i8);
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled i8 math operator");
+ /*NOTREACHED*/
+ break;
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+ }
+
+ case ISD::SUB: {
+ // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::ROTR:
+ case ISD::ROTL: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
+ SDValue ExpandArg =
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
+ N0, DAG.getConstant(8, MVT::i32)));
+
+ // Truncate back down to i8
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+ }
+ case ISD::SRL:
+ case ISD::SHL: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
+
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::SRA: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
+
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::MUL: {
+ SDValue N1 = Op.getOperand(1);
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+ SDValue ConstVec;
+ SDValue Arg;
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ ConstVec = Op.getOperand(0);
+ Arg = Op.getOperand(1);
+ if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+ ConstVec = ConstVec.getOperand(0);
+ } else {
+ ConstVec = Op.getOperand(1);
+ Arg = Op.getOperand(0);
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+ ConstVec = ConstVec.getOperand(0);
+ }
+ }
+ }
+
+ if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
+ return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+ }
+ }
+
+ // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+ // lowered. Return the operation, rather than a null SDValue.
+ return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+ Custom lowering code that counts the number ones in the input
+ operand. SPU has such an instruction, but it counts the number of
+ ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+ }
+
+ case MVT::i16: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+ SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+ SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ Tmp1, Shift1),
+ Tmp1),
+ Mask0);
+ }
+
+ case MVT::i32: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+ SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+ SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+ SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Comp1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+ Shift1);
+
+ SDValue Sum1 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+ SDValue Sum1_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+ SDValue Comp2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+ Shift2);
+ SDValue Sum2 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+ }
+
+ case MVT::i64:
+ break;
+ }
+
+ return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ DebugLoc dl = Op.getDebugLoc();
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ EVT lhsVT = lhs.getValueType();
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+ EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ EVT IntVT(MVT::i64);
+
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(dl, ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
+
+ switch (CC->get()) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
+ case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
+ default:
+ report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
+ }
+
+ SDValue result =
+ DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+ (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+ }
+
+ return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(Op.getValueType()),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ // Type to truncate to
+ EVT VT = Op.getValueType();
+ MVT simpleVT = VT.getSimpleVT();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to truncate from
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
+ // Create shuffle mask, least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ Op0, Op0, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+ }
+
+ return SDValue(); // Leave the truncate unmolested
+}
+
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to extend to
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // Type to extend from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+ // extend i8 & i16 via i32
+ if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+ Op0VT = MVT::i32;
+ }
+
+ // The type to extend to needs to be a i128 and
+ // the type to extend from needs to be i64 or i32.
+ assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+ "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+ (void)OpVT;
+
+ // Create shuffle mask
+ unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+ unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
+ unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask2, MVT::i32),
+ DAG.getConstant(mask3, MVT::i32));
+
+ // Word wise arithmetic right shift to generate at least one byte
+ // that contains sign bits.
+ MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+ SDValue sraVal = DAG.getNode(ISD::SRA,
+ dl,
+ mvt,
+ DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+ DAG.getConstant(31, MVT::i32));
+
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
+ // Shuffle bytes - Copy the sign bits into the upper 64 bits
+ // and the input value into the lower 64 bits.
+ SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+ extended, sraVal, shufMask);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ unsigned Opc = (unsigned) Op.getOpcode();
+ EVT VT = Op.getValueType();
+
+ switch (Opc) {
+ default: {
+#ifndef NDEBUG
+ errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
+ Op.getNode()->dump();
+#endif
+ llvm_unreachable(0);
+ }
+ case ISD::LOAD:
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD:
+ case ISD::ZEXTLOAD:
+ return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantFP:
+ return LowerConstantFP(Op, DAG);
+
+ // i8, i64 math ops:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case ISD::SRL:
+ case ISD::SHL:
+ case ISD::SRA: {
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+ break;
+ }
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+ // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return LowerByteImmed(Op, DAG);
+
+ // Vector and i8 multiply:
+ case ISD::MUL:
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+
+ case ISD::CTPOP:
+ return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG, *this);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG, *this);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SIGN_EXTEND:
+ return LowerSIGN_EXTEND(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const
+{
+#if 0
+ unsigned Opc = (unsigned) N->getOpcode();
+ EVT OpVT = N->getValueType(0);
+
+ switch (Opc) {
+ default: {
+ errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
+ N->dump();
+ abort();
+ /*NOTREACHED*/
+ }
+ }
+#endif
+
+ /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+ TargetMachine &TM = getTargetMachine();
+#endif
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ EVT NodeVT = N->getValueType(0); // The node's value type
+ EVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: {
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ IndirectArg, combinedValue);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+ // (any_extend (SPUextract_elt0 <arg>)) ->
+ // (SPUextract_elt0 <arg>)
+ // Types must match, however...
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\nReplace: ";
+ N->dump(&DAG);
+ errs() << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ return Op0;
+ }
+ break;
+ }
+ case SPUISD::IndirectAddr: {
+ if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CN != 0 && CN->isNullValue()) {
+ // (SPUindirect (SPUaform <addr>, 0), 0) ->
+ // (SPUaform <addr>, 0)
+
+ DEBUG(errs() << "Replace: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(errs() << "\nWith: ");
+ DEBUG(Op0.getNode()->dump(&DAG));
+ DEBUG(errs() << "\n");
+
+ return Op0;
+ }
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+ }
+ break;
+ }
+ case SPUISD::SHL_BITS:
+ case SPUISD::SHL_BYTES:
+ case SPUISD::ROTBYTES_LEFT: {
+ SDValue Op1 = N->getOperand(1);
+
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
+ Result = Op0;
+ }
+ }
+ break;
+ }
+ case SPUISD::PREFSLOT2VEC: {
+ switch (Op0.getOpcode()) {
+ default:
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+ // <arg>
+ // but only if the SPUprefslot2vec and <arg> types match.
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+ SDValue Op000 = Op00.getOperand(0);
+ if (Op000.getValueType() == NodeVT) {
+ Result = Op000;
+ }
+ }
+ break;
+ }
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+ // <arg>
+ Result = Op0.getOperand(0);
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ // Otherwise, return unchanged.
+#ifndef NDEBUG
+ if (Result.getNode()) {
+ DEBUG(errs() << "\nReplace.SPU: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(errs() << "\nWith: ");
+ DEBUG(Result.getNode()->dump(&DAG));
+ DEBUG(errs() << "\n");
+ }
+#endif
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+ if (ConstraintLetter.size() == 1) {
+ switch (ConstraintLetter[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64)
+ return std::make_pair(0U, SPU::R64CRegisterClass);
+ return std::make_pair(0U, SPU::R32CRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, SPU::R32FPRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, SPU::R64FPRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, SPU::GPRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth ) const {
+#if 0
+ const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+ switch (Op.getOpcode()) {
+ default:
+ // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ break;
+ case CALL:
+ case SHUFB:
+ case SHUFFLE_MASK:
+ case CNTB:
+ case SPUISD::PREFSLOT2VEC:
+ case SPUISD::LDRESULT:
+ case SPUISD::VEC2PREFSLOT:
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_ROTL:
+ case SPUISD::VEC_ROTR:
+ case SPUISD::ROTBYTES_LEFT:
+ case SPUISD::SELECT_MASK:
+ case SPUISD::SELB:
+ }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+
+ case ISD::SETCC: {
+ EVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Default, for the time being, to the base class handler
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+ Type *Ty) const {
+ // SPU's addresses are 256K:
+ return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The SPU target isn't yet aware of offsets.
+ return false;
+}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 0000000..aa4a168
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,186 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SPU.h"
+
+namespace llvm {
+ namespace SPUISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Pseudo instructions:
+ RET_FLAG, ///< Return with flag, matched by bi instruction
+
+ Hi, ///< High address component (upper 16)
+ Lo, ///< Low address component (lower 16)
+ PCRelAddr, ///< Program counter relative address
+ AFormAddr, ///< A-form address (local store)
+ IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
+
+ LDRESULT, ///< Load result (value, chain)
+ CALL, ///< CALL instruction
+ SHUFB, ///< Vector shuffle (permute)
+ SHUFFLE_MASK, ///< Shuffle mask
+ CNTB, ///< Count leading ones in bytes
+ PREFSLOT2VEC, ///< Promote scalar->vector
+ VEC2PREFSLOT, ///< Extract element 0
+ SHL_BITS, ///< Shift quad left, by bits
+ SHL_BYTES, ///< Shift quad left, by bytes
+ SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
+ VEC_ROTL, ///< Vector rotate left
+ VEC_ROTR, ///< Vector rotate right
+ ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
+ ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
+ SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+ SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
+ LAST_SPUISD ///< Last user-defined instruction
+ };
+ }
+
+ //! Utility functions specific to CellSPU:
+ namespace SPU {
+ SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+ SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+ //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
+ SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
+ DebugLoc dl);
+ }
+
+ class SPUTargetMachine; // forward dec'l.
+
+ class SPUTargetLowering :
+ public TargetLowering
+ {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ SPUTargetMachine &SPUTM;
+
+ public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
+ SPUTargetLowering(SPUTargetMachine &TM);
+
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ValueType for ISD::SETCC
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ //! Custom lowering hooks
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ //! Custom lowering hook for nodes with illegal result types.
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth = 0) const;
+
+ ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
+ virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 0000000..5e268f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 0000000..bdbe255
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,320 @@
+//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SPU";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin> {
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+ // RR Format, where RB is zeroed (dont care):
+ class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ let RA = 0 in {
+ // RR Format, where RA and RB are zeroed (dont care):
+ // Used for reads from status control registers (see FPSCRRr32)
+ class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+ }
+}
+
+let RT = 0 in {
+ // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+ // says, "RT is a false target." Used in "Halt if" instructions
+ class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RC;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-3} = opcode;
+ let Inst{4-10} = RT;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> i7;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = i7;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-9} = opcode;
+ let Inst{10-17} = 0;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+ class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ let RT = 0 in {
+ // Branch instruction format (without D/E flag settings)
+ class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+ : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+ pattern>
+ { }
+
+ let RB = 0 in {
+ // Return instruction (bi, branch indirect), RA is zero (LR):
+ class RETForm<string asmstr, list<dag> pattern>
+ : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+ pattern>
+ { }
+ }
+ }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+ : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+ bits<7> Rcalldest;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b11010101100;
+ let Inst{11} = 0;
+ let Inst{12-13} = DE_flag;
+ let Inst{14-17} = 0b0000;
+ let Inst{18-24} = Rcalldest;
+ let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<10> i10;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-7} = opcode;
+ let Inst{8-17} = i10;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+ class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+ class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<16> i16;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-8} = opcode;
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+ class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<18> i18;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-24} = i18;
+ let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+ : Instruction {
+ field bits<32> Inst;
+ bits<16>i16;
+ bits<9>RO;
+
+ let Namespace = "SPU";
+ let InOperandList = IOL;
+ let OutOperandList = (outs); //no output
+ let AsmString = asmstr;
+ let Itinerary = BranchHints;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-8} = RO{8-7};
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RO{6-0};
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 0000000..007bc0e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,453 @@
+//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+namespace {
+ //! Predicate for an unconditional branch instruction
+ inline bool isUncondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BR
+ || opc == SPU::BRA
+ || opc == SPU::BI);
+ }
+
+ //! Predicate for a conditional branch instruction
+ inline bool isCondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
+ }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+ : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+ TM(tm),
+ RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new SPUHazardRecognizer(*TII);
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ // We support cross register class moves for our aliases, such as R3 in any
+ // reg class to any other reg class containing R3. This is required because
+ // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+ // types have no specific meaning.
+
+ BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+ } else {
+ llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
+ }
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+//! Branch analysis
+/*!
+ \note This code was kiped from PPC. There may be more branch analysis for
+ CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranch(LastInst)) {
+ // Check for jump tables
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(LastInst)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ DEBUG(errs() << "Pushing LastInst: ");
+ DEBUG(LastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a conditional and unconditional branch, handle it.
+ if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ DEBUG(errs() << "Pushing SecondLastInst: ");
+ DEBUG(SecondLastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+ if (I->getOpcode() == SPU::HBRA ||
+ I->getOpcode() == SPU::HBR_LABEL){
+ I=MBB.erase(I);
+ if (I == MBB.end())
+ break;
+ }
+ }
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ removeHBR(MBB);
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isCondBranch(I) && !isUncondBranch(I))
+ return 0;
+
+ // Remove the first branch.
+ DEBUG(errs() << "Removing branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ I = MBB.end();
+ if (I == MBB.begin())
+ return 1;
+
+ --I;
+ if (!(isCondBranch(I) || isUncondBranch(I)))
+ return 1;
+
+ // Remove the second branch.
+ DEBUG(errs() << "Removing second branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ return 2;
+}
+
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ * -the branch hint delays
+ * -congestion of the memory bus
+ * -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+ MachineBasicBlock::iterator J = MBB.end();
+ for( int i=0; i<8; i++) {
+ if( J == MBB.begin() ) return J;
+ J--;
+ }
+ return J;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "SPU branch conditions have two components!");
+
+ MachineInstrBuilder MIB;
+ //TODO: make a more accurate algorithm.
+ bool haveHBR = MBB.size()>8;
+
+ removeHBR(MBB);
+ MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+ // Add a label just before the branch
+ if (haveHBR)
+ MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Unconditional branch
+ MIB = BuildMI(&MBB, DL, get(SPU::BR));
+ MIB.addMBB(TBB);
+
+ DEBUG(errs() << "Inserted one-way uncond branch: ");
+ DEBUG((*MIB).dump());
+
+ // basic blocks have just one branch so it is safe to add the hint a its
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+ } else {
+ // Conditional branch
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+ if (haveHBR) {
+ MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+
+ DEBUG(errs() << "Inserted one-way cond branch: ");
+ DEBUG((*MIB).dump());
+ }
+ return 1;
+ } else {
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
+
+ // Two-way Conditional Branch.
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+ MIB2.addMBB(FBB);
+
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(FBB);
+ }
+
+ DEBUG(errs() << "Inserted conditional branch: ");
+ DEBUG((*MIB).dump());
+ DEBUG(errs() << "part 2: ");
+ DEBUG((*MIB2).dump());
+ return 2;
+ }
+}
+
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // Pretty brainless way of inverting the condition, but it works, considering
+ // there are only two conditions...
+ static struct {
+ unsigned Opc; //! The incoming opcode
+ unsigned RevCondOpc; //! The reversed condition opcode
+ } revconds[] = {
+ { SPU::BRNZr32, SPU::BRZr32 },
+ { SPU::BRNZv4i32, SPU::BRZv4i32 },
+ { SPU::BRZr32, SPU::BRNZr32 },
+ { SPU::BRZv4i32, SPU::BRNZv4i32 },
+ { SPU::BRHNZr16, SPU::BRHZr16 },
+ { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+ { SPU::BRHZr16, SPU::BRHNZr16 },
+ { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+ };
+
+ unsigned Opc = unsigned(Cond[0].getImm());
+ // Pretty dull mapping between the two conditions that SPU can generate:
+ for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+ if (revconds[i].Opc == Opc) {
+ Cond[0].setImm(revconds[i].RevCondOpc);
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 0000000..bc1ba71
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,84 @@
+//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SPURegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SPUGenInstrInfo.inc"
+
+namespace llvm {
+ //! Cell SPU instruction information class
+ class SPUInstrInfo : public SPUGenInstrInfo {
+ SPUTargetMachine &TM;
+ const SPURegisterInfo RI;
+ public:
+ explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ //! Store a register to a stack slot, based on its register class.
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ //! Load a register from a stack slot, based on its register class.
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ //! Reverses a branch's condition, returning false on success.
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 0000000..f76ebd7
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4484 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt)]>;
+ def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ),
+ "$targ:\t${:comment}branch hint target",[ ]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+ class LoadDFormVec<ValueType vectype>
+ : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+ { }
+
+ class LoadDForm<RegisterClass rclass>
+ : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load dform_addr:$src))]>
+ { }
+
+ multiclass LoadDForms
+ {
+ def v16i8: LoadDFormVec<v16i8>;
+ def v8i16: LoadDFormVec<v8i16>;
+ def v4i32: LoadDFormVec<v4i32>;
+ def v2i64: LoadDFormVec<v2i64>;
+ def v4f32: LoadDFormVec<v4f32>;
+ def v2f64: LoadDFormVec<v2f64>;
+
+ def r128: LoadDForm<GPRC>;
+ def r64: LoadDForm<R64C>;
+ def r32: LoadDForm<R32C>;
+ def f32: LoadDForm<R32FP>;
+ def f64: LoadDForm<R64FP>;
+ def r16: LoadDForm<R16C>;
+ def r8: LoadDForm<R8C>;
+ }
+
+ class LoadAFormVec<ValueType vectype>
+ : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+ { }
+
+ class LoadAForm<RegisterClass rclass>
+ : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load aform_addr:$src))]>
+ { }
+
+ multiclass LoadAForms
+ {
+ def v16i8: LoadAFormVec<v16i8>;
+ def v8i16: LoadAFormVec<v8i16>;
+ def v4i32: LoadAFormVec<v4i32>;
+ def v2i64: LoadAFormVec<v2i64>;
+ def v4f32: LoadAFormVec<v4f32>;
+ def v2f64: LoadAFormVec<v2f64>;
+
+ def r128: LoadAForm<GPRC>;
+ def r64: LoadAForm<R64C>;
+ def r32: LoadAForm<R32C>;
+ def f32: LoadAForm<R32FP>;
+ def f64: LoadAForm<R64FP>;
+ def r16: LoadAForm<R16C>;
+ def r8: LoadAForm<R8C>;
+ }
+
+ class LoadXFormVec<ValueType vectype>
+ : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+ { }
+
+ class LoadXForm<RegisterClass rclass>
+ : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load xform_addr:$src))]>
+ { }
+
+ multiclass LoadXForms
+ {
+ def v16i8: LoadXFormVec<v16i8>;
+ def v8i16: LoadXFormVec<v8i16>;
+ def v4i32: LoadXFormVec<v4i32>;
+ def v2i64: LoadXFormVec<v2i64>;
+ def v4f32: LoadXFormVec<v4f32>;
+ def v2f64: LoadXFormVec<v2f64>;
+
+ def r128: LoadXForm<GPRC>;
+ def r64: LoadXForm<R64C>;
+ def r32: LoadXForm<R32C>;
+ def f32: LoadXForm<R32FP>;
+ def f64: LoadXForm<R64FP>;
+ def r16: LoadXForm<R16C>;
+ def r8: LoadXForm<R8C>;
+ }
+
+ defm LQA : LoadAForms;
+ defm LQD : LoadDForms;
+ defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+ Might be of use later for relocatable code. It's effectively the
+ same as LQA, but uses PC-relative addressing.
+ def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+ "lqr\t$rT, $disp", LoadStore,
+ [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+ : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+ : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+ def v16i8: StoreDFormVec<v16i8>;
+ def v8i16: StoreDFormVec<v8i16>;
+ def v4i32: StoreDFormVec<v4i32>;
+ def v2i64: StoreDFormVec<v2i64>;
+ def v4f32: StoreDFormVec<v4f32>;
+ def v2f64: StoreDFormVec<v2f64>;
+
+ def r128: StoreDForm<GPRC>;
+ def r64: StoreDForm<R64C>;
+ def r32: StoreDForm<R32C>;
+ def f32: StoreDForm<R32FP>;
+ def f64: StoreDForm<R64FP>;
+ def r16: StoreDForm<R16C>;
+ def r8: StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+ : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+ : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+ def v16i8: StoreAFormVec<v16i8>;
+ def v8i16: StoreAFormVec<v8i16>;
+ def v4i32: StoreAFormVec<v4i32>;
+ def v2i64: StoreAFormVec<v2i64>;
+ def v4f32: StoreAFormVec<v4f32>;
+ def v2f64: StoreAFormVec<v2f64>;
+
+ def r128: StoreAForm<GPRC>;
+ def r64: StoreAForm<R64C>;
+ def r32: StoreAForm<R32C>;
+ def f32: StoreAForm<R32FP>;
+ def f64: StoreAForm<R64FP>;
+ def r16: StoreAForm<R16C>;
+ def r8: StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+ : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+ : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+ def v16i8: StoreXFormVec<v16i8>;
+ def v8i16: StoreXFormVec<v8i16>;
+ def v4i32: StoreXFormVec<v4i32>;
+ def v2i64: StoreXFormVec<v2i64>;
+ def v4f32: StoreXFormVec<v4f32>;
+ def v2f64: StoreXFormVec<v2f64>;
+
+ def r128: StoreXForm<GPRC>;
+ def r64: StoreXForm<R64C>;
+ def r32: StoreXForm<R32C>;
+ def f32: StoreXForm<R32FP>;
+ def f64: StoreXForm<R64FP>;
+ def r16: StoreXForm<R16C>;
+ def r8: StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+ be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+ "stqr\t$rT, $disp", LoadStore,
+ [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cbd\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cbx\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "chd\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "chx\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+ RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+ RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+ RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+ def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+ def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+ // TODO: Need v2f64, v4f32
+
+ def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+ def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+ def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+ def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+ def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+ def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+ def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+ def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+ // Loads the high portion of an address
+ def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+ // Used in custom lowering constant SFP loads:
+ def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+ LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+ def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+ def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+ def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+ def r32: ILARegInst<R32C, u18imm, imm18>;
+ def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+ def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+ def hi: ILARegInst<R32C, symbolHi, imm18>;
+ def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+ def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+ [(set R32C:$rT, imm18:$val)]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+ ImmLoad, pattern>,
+ RegConstraint<"$rS = $rT">,
+ NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+ def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+ def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+ def r32: IOHLRegInst<R32C, i32imm>;
+ def f32: IOHLRegInst<R32FP, f32imm>;
+
+ def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+ RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+ "fsmbi\t$rT, $val",
+ SelectOp,
+ [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+ def v16i8: FSMBIVec<v16i8>;
+ def v8i16: FSMBIVec<v8i16>;
+ def v4i32: FSMBIVec<v4i32>;
+ def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+ def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+ def v16i8: FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+ def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+ def v8i16: FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+ FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+ FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+ def v4i32: FSMVecInst<v4i32>;
+
+ def r32 : FSMRegInst<v4i32, R32C>;
+ def r16 : FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+ def r32 : FSMRegInst<v2i64, R32C>;
+ def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+ RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+ RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+ RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+ RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000011000, OOL, IOL,
+ "a\t$rT, $rA, $rB", IntegerOp,
+ pattern>;
+
+class AVecInst<ValueType vectype>:
+ AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+ AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+
+multiclass AddInstruction {
+ def v4i32: AVecInst<v4i32>;
+ def v16i8: AVecInst<v16i8>;
+ def r32: ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00111000, OOL, IOL,
+ "ai\t$rT, $rA, $val", IntegerOp,
+ pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+
+multiclass AddImmediate {
+ def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+ def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+ def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+ def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+ RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+ RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
+
+def SFHIvec:
+ RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+ (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
+
+def SFIvec:
+ RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+ (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+ (ins R32C:$rA, s10imm_i32:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000010110, OOL, IOL,
+ "addx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+ ADDXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+ ADDXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+ def v2i64 : ADDXVecInst<v2i64>;
+ def v4i32 : ADDXVecInst<v4i32>;
+ def r64 : ADDXRegInst<R64C>;
+ def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000011000, OOL, IOL,
+ "cg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+ CGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+ CGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass CarryGenerate {
+ def v2i64 : CGVecInst<v2i64>;
+ def v4i32 : CGVecInst<v4i32>;
+ def r64 : CGRegInst<R64C>;
+ def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010110, OOL, IOL,
+ "sfx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+ SFXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+ SFXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+ def v2i64 : SFXVecInst<v2i64>;
+ def v4i32 : SFXVecInst<v4i32>;
+ def r64 : SFXRegInst<R64C>;
+ def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000010000, OOL, IOL,
+ "bg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+ BGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+ BGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+ def v4i32 : BGVecInst<v4i32>;
+ def v2i64 : BGVecInst<v2i64>;
+ def r64 : BGRegInst<R64C>;
+ def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+ RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+ VECREG:$rCarry),
+ "bgx\t$rT, $rA, $rB", IntegerOp,
+ []>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYr16:
+ RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00110011110, OOL, IOL,
+ "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYUv4i32:
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYUr16:
+ MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+ MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00101110, OOL, IOL,
+ "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYIvec:
+ MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+ MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10101110, OOL, IOL,
+ "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYUIvec:
+ MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ []>;
+
+def MPYUIr16:
+ MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b0011, OOL, IOL,
+ "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+ pattern>;
+
+def MPYAv4i32:
+ MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4i32 VECREG:$rT),
+ (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))),
+ (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sext:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+ MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+ (sext_inreg R32C:$rB, i16)),
+ R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10100011110, OOL, IOL,
+ "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYHv4i32:
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYHr32:
+ MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+ RRForm<0b11100011110, OOL, IOL,
+ "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYSv4i32:
+ MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYSr16:
+ MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+ RRForm<0b01100011110, OOL, IOL,
+ "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHv8i16:
+ MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+ MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+ RRForm<0b01100010110, OOL, IOL,
+ "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAvec:
+ MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAr32:
+ MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+ RRForm<0b01110011110, OOL, IOL,
+ "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHUv4i32:
+ MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHUr32:
+ MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+ RRForm<0b01110010110, OOL, IOL,
+ "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAUvec:
+ MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAUr32:
+ MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+ IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+ CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+ CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+ def v4i32 : CLZVecInst<v4i32>;
+ def r32 : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+ GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+ def v16i8_r32: GBBRegInst<R32C, v16i8>;
+ def v16i8_r16: GBBRegInst<R16C, v16i8>;
+ def v16i8: GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+ GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+ GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+ def v8i16_r32: GBHRegInst<R32C, v8i16>;
+ def v8i16_r16: GBHRegInst<R16C, v8i16>;
+ def v8i16: GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+ GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+ def v4i32_r32: GBRegInst<R32C, v4i32>;
+ def v4i32_r16: GBRegInst<R16C, v4i32>;
+ def v4i32: GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+ RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "avgb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+ RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "absdb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+ RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sumb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL,
+ "xsbh\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+ pattern>;
+
+multiclass ExtendByteHalfword {
+ def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [
+ /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def r16: XSBHInRegInst<R16C,
+ [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+ // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+ // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+ // pattern below). Intentionally doesn't match a pattern because we want the
+ // sext 8->32 pattern to do the work for us, namely because we need the extra
+ // XSHWr32.
+ def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
+
+ // Same as the 32-bit version, but for i64
+ def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+ IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDest),
+ (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+ pattern>;
+
+class XSHWRegInst<RegisterClass rclass>:
+ XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+ [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+ def v4i32: XSHWVecInst<v8i16, v4i32>;
+
+ def r16: XSHWRegInst<R32C>;
+
+ def r32: XSHWInRegInst<R32C,
+ [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+ def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [/*(set (out_vectype VECREG:$rDst),
+ (sext (out_vectype VECREG:$rSrc)))*/]>;
+
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+ XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+ [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+
+multiclass ExtendWordToDoubleWord {
+ def v2i64: XSWDVecInst<v4i32, v2i64>;
+ def r64: XSWDRegInst<R32C, R64C>;
+
+ def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+ [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+ ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+ ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+ def v16i8: ANDVecInst<v16i8>;
+ def v8i16: ANDVecInst<v8i16>;
+ def v4i32: ANDVecInst<v4i32>;
+ def v2i64: ANDVecInst<v2i64>;
+
+ def r128: ANDRegInst<GPRC>;
+ def r64: ANDRegInst<R64C>;
+ def r32: ANDRegInst<R32C>;
+ def r16: ANDRegInst<R16C>;
+ def r8: ANDRegInst<R8C>;
+
+ //===---------------------------------------------
+ // Special instructions to perform the fabs instruction
+ def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ //===---------------------------------------------
+
+ // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+ // quantities -- see 16->32 zext pattern.
+ //
+ // This pattern is somewhat artificial, since it might match some
+ // compiler generated pattern but it is unlikely to do so.
+
+ def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (and (vectype VECREG:$rA),
+ (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+ ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+ def v16i8: ANDCVecInst<v16i8>;
+ def v8i16: ANDCVecInst<v8i16>;
+ def v4i32: ANDCVecInst<v4i32>;
+ def v2i64: ANDCVecInst<v2i64>;
+
+ def r128: ANDCRegInst<GPRC>;
+ def r64: ANDCRegInst<R64C>;
+ def r32: ANDCRegInst<R32C>;
+ def r16: ANDCRegInst<R16C>;
+ def r8: ANDCRegInst<R8C>;
+
+ // Sometimes, the xor pattern has a bitcast constant:
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+ def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT),
+ (and (v16i8 VECREG:$rA),
+ (v16i8 v16i8U8Imm:$val)))]>;
+
+ def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+ def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+ def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Zero-extend i8 to i16:
+ def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+ def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+ def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+ // pattern below.
+ def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+ // zext 16->32 pattern below.
+ //
+ // Note that this pattern is somewhat artificial, since it might match
+ // something the compiler generates but is unlikely to occur in practice.
+ def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+ ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+ ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+
+multiclass BitwiseOr
+{
+ def v16i8: ORVecInst<v16i8>;
+ def v8i16: ORVecInst<v8i16>;
+ def v4i32: ORVecInst<v4i32>;
+ def v2i64: ORVecInst<v2i64>;
+
+ def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4f32 VECREG:$rT),
+ (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))))]>;
+
+ def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v2f64 VECREG:$rT),
+ (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)))))]>;
+
+ def r128: ORRegInst<GPRC>;
+ def r64: ORRegInst<R64C>;
+ def r32: ORRegInst<R32C>;
+ def r16: ORRegInst<R16C>;
+ def r8: ORRegInst<R8C>;
+
+ // OR instructions used to copy f32 and f64 registers.
+ def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [/* no pattern */]>;
+
+ def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ [/* no pattern */]>;
+}
+
+defm OR : BitwiseOr;
+
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+ (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+ (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
+
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
+
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
+
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
+
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
+
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+ class LRInst<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/*no pattern*/];
+
+ let Inst{0-10} = 0b10000010000; /* It's an OR operation */
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+ }
+
+ class LRVecInst<ValueType vectype>:
+ LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+ class LRRegInst<RegisterClass rclass>:
+ LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+ multiclass LoadRegister {
+ def v2i64: LRVecInst<v2i64>;
+ def v2f64: LRVecInst<v2f64>;
+ def v4i32: LRVecInst<v4i32>;
+ def v4f32: LRVecInst<v4f32>;
+ def v8i16: LRVecInst<v8i16>;
+ def v16i8: LRVecInst<v16i8>;
+
+ def r128: LRRegInst<GPRC>;
+ def r64: LRRegInst<R64C>;
+ def f64: LRRegInst<R64FP>;
+ def r32: LRRegInst<R32C>;
+ def f32: LRRegInst<R32FP>;
+ def r16: LRRegInst<R16C>;
+ def r8: LRRegInst<R8C>;
+ }
+
+ defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+ ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+ ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+ def v16i8: ORCVecInst<v16i8>;
+ def v8i16: ORCVecInst<v8i16>;
+ def v4i32: ORCVecInst<v4i32>;
+ def v2i64: ORCVecInst<v2i64>;
+
+ def r128: ORCRegInst<GPRC>;
+ def r64: ORCRegInst<R64C>;
+ def r32: ORCRegInst<R32C>;
+ def r16: ORCRegInst<R16C>;
+ def r8: ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+ def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+ def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+ def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+ def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Specialized ORHI form used to promote 8-bit registers to 16-bit
+ def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+ [(set R16C:$rT, (or (anyext R8C:$rA),
+ i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+ def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+ // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R16C:$rA),
+ i32ImmSExt10:$val))]>;
+
+ // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+ // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R8C:$rA),
+ i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+ RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "orx\t$rT, $rA, $rB", IntegerOp,
+ []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+ XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+ XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+ def v16i8: XORVecInst<v16i8>;
+ def v8i16: XORVecInst<v8i16>;
+ def v4i32: XORVecInst<v4i32>;
+ def v2i64: XORVecInst<v2i64>;
+
+ def r128: XORRegInst<GPRC>;
+ def r64: XORRegInst<R64C>;
+ def r32: XORRegInst<R32C>;
+ def r16: XORRegInst<R16C>;
+ def r8: XORRegInst<R8C>;
+
+ // XOR instructions used to negate f32 and f64 quantities.
+
+ def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+ def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* no pattern */]>;
+
+ def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+ def v16i8:
+ XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+ def r8:
+ XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+ RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+ v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+ RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+ NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+ NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+ def v16i8: NANDVecInst<v16i8>;
+ def v8i16: NANDVecInst<v8i16>;
+ def v4i32: NANDVecInst<v4i32>;
+ def v2i64: NANDVecInst<v2i64>;
+
+ def r128: NANDRegInst<GPRC>;
+ def r64: NANDRegInst<R64C>;
+ def r32: NANDRegInst<R32C>;
+ def r16: NANDRegInst<R16C>;
+ def r8: NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+ NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+ NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+ def v16i8: NORVecInst<v16i8>;
+ def v8i16: NORVecInst<v8i16>;
+ def v4i32: NORVecInst<v4i32>;
+ def v2i64: NORVecInst<v2i64>;
+
+ def r128: NORRegInst<GPRC>;
+ def r64: NORRegInst<R64C>;
+ def r32: NORRegInst<R32C>;
+ def r16: NORRegInst<R16C>;
+ def r8: NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+ (and (vnot_frag (vectype VECREG:$rC)),
+ (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (select (vectype VECREG:$rC),
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+ [(set (vectype VECREG:$rT),
+ (select R32C:$rC,
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+ [(set rclass:$rT,
+ (or (and rclass:$rB, rclass:$rC),
+ (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+ [(set rclass:$rT,
+ (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+ def v16i8: SELBVecInst<v16i8>;
+ def v8i16: SELBVecInst<v8i16>;
+ def v4i32: SELBVecInst<v4i32>;
+ def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
+
+ def r128: SELBRegInst<GPRC>;
+ def r64: SELBRegInst<R64C>;
+ def r32: SELBRegInst<R32C>;
+ def r16: SELBRegInst<R16C>;
+ def r8: SELBRegInst<R8C>;
+
+ def v16i8_cond: SELBVecCondInst<v16i8>;
+ def v8i16_cond: SELBVecCondInst<v8i16>;
+ def v4i32_cond: SELBVecCondInst<v4i32>;
+ def v2i64_cond: SELBVecCondInst<v2i64>;
+
+ def v16i8_vcond: SELBVecCondInst<v16i8>;
+ def v8i16_vcond: SELBVecCondInst<v8i16>;
+ def v4i32_vcond: SELBVecCondInst<v4i32>;
+ def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+ def v4f32_cond:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4f32 VECREG:$rT),
+ (select (v4i32 VECREG:$rC),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)))]>;
+
+ // SELBr64_cond is defined in SPU64InstrInfo.td
+ def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def f32_cond: SELBRegCondInst<R32C, R32FP>;
+ def r16_cond: SELBRegCondInst<R16C, R16C>;
+ def r8_cond: SELBRegCondInst<R8C, R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+ Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+ (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+ Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+ (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C, SELBr8>;
+def : SPUselbPatReg<R16C, SELBr16>;
+def : SPUselbPatReg<R32C, SELBr32>;
+def : SPUselbPatReg<R64C, SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (and (vnot (vectype VECREG:$rA)),
+ (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+ (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (or (and rclass:$rA, rclass:$rB),
+ (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+ def v16i8: EQVVecInst<v16i8>;
+ def v8i16: EQVVecInst<v8i16>;
+ def v4i32: EQVVecInst<v4i32>;
+ def v2i64: EQVVecInst<v2i64>;
+
+ def v16i8_1: EQVVecPattern1<v16i8>;
+ def v8i16_1: EQVVecPattern1<v8i16>;
+ def v4i32_1: EQVVecPattern1<v4i32>;
+ def v2i64_1: EQVVecPattern1<v2i64>;
+
+ def v16i8_2: EQVVecPattern2<v16i8>;
+ def v8i16_2: EQVVecPattern2<v8i16>;
+ def v4i32_2: EQVVecPattern2<v4i32>;
+ def v2i64_2: EQVVecPattern2<v2i64>;
+
+ def v16i8_3: EQVVecPattern3<v16i8>;
+ def v8i16_3: EQVVecPattern3<v8i16>;
+ def v4i32_3: EQVVecPattern3<v4i32>;
+ def v2i64_3: EQVVecPattern3<v2i64>;
+
+ def r128: EQVRegInst<GPRC>;
+ def r64: EQVRegInst<R64C>;
+ def r32: EQVRegInst<R32C>;
+ def r16: EQVRegInst<R16C>;
+ def r8: EQVRegInst<R8C>;
+
+ def r128_1: EQVRegPattern1<GPRC>;
+ def r64_1: EQVRegPattern1<R64C>;
+ def r32_1: EQVRegPattern1<R32C>;
+ def r16_1: EQVRegPattern1<R16C>;
+ def r8_1: EQVRegPattern1<R8C>;
+
+ def r128_2: EQVRegPattern2<GPRC>;
+ def r64_2: EQVRegPattern2<R64C>;
+ def r32_2: EQVRegPattern2<R32C>;
+ def r16_2: EQVRegPattern2<R16C>;
+ def r8_2: EQVRegPattern2<R8C>;
+
+ def r128_3: EQVRegPattern3<GPRC>;
+ def r64_3: EQVRegPattern3<R64C>;
+ def r32_3: EQVRegPattern3<R32C>;
+ def r16_3: EQVRegPattern3<R16C>;
+ def r8_3: EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+ ShuffleOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+ SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (resultvec VECREG:$rT),
+ (SPUshuffle (resultvec VECREG:$rA),
+ (resultvec VECREG:$rB),
+ (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+ SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+ def v16i8 : SHUFBVecInst<v16i8, v16i8>;
+ def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+ def v8i16 : SHUFBVecInst<v8i16, v16i8>;
+ def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+ def v4i32 : SHUFBVecInst<v4i32, v16i8>;
+ def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+ def v2i64 : SHUFBVecInst<v2i64, v16i8>;
+ def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+ def v4f32 : SHUFBVecInst<v4f32, v16i8>;
+ def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+ def v2f64 : SHUFBVecInst<v2f64, v16i8>;
+ def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+ def gprc : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+ SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass ShiftLeftHalfword
+{
+ def v8i16: SHLHVecInst<v8i16>;
+ def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+ def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+ SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+ def v8i16: SHLHIVecInst<v8i16>;
+ def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+ (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+multiclass ShiftLeftWord
+{
+ def v4i32:
+ SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ def r32:
+ SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+ def v4i32:
+ SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+ def r32:
+ SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+ [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+ SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+ SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+ def v16i8: SHLQBIVecInst<v16i8>;
+ def v8i16: SHLQBIVecInst<v8i16>;
+ def v4i32: SHLQBIVecInst<v4i32>;
+ def v4f32: SHLQBIVecInst<v4f32>;
+ def v2i64: SHLQBIVecInst<v2i64>;
+ def v2f64: SHLQBIVecInst<v2f64>;
+
+ def r128: SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+ SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+ def v16i8 : SHLQBIIVecInst<v16i8>;
+ def v8i16 : SHLQBIIVecInst<v8i16>;
+ def v4i32 : SHLQBIIVecInst<v4i32>;
+ def v4f32 : SHLQBIIVecInst<v4f32>;
+ def v2i64 : SHLQBIIVecInst<v2i64>;
+ def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+ SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+ def v16i8: SHLQBYVecInst<v16i8>;
+ def v8i16: SHLQBYVecInst<v8i16>;
+ def v4i32: SHLQBYVecInst<v4i32>;
+ def v4f32: SHLQBYVecInst<v4f32>;
+ def v2i64: SHLQBYVecInst<v2i64>;
+ def v2f64: SHLQBYVecInst<v2f64>;
+ def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+ SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+ def v16i8: SHLQBYIVecInst<v16i8>;
+ def v8i16: SHLQBYIVecInst<v8i16>;
+ def v4i32: SHLQBYIVecInst<v4i32>;
+ def v4f32: SHLQBYIVecInst<v4f32>;
+ def v2i64: SHLQBYIVecInst<v2i64>;
+ def v2f64: SHLQBYIVecInst<v2f64>;
+ def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+ [(set GPRC:$rT,
+ (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+ SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+ SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+ def v16i8: SHLQBYBIVecInst<v16i8>;
+ def v8i16: SHLQBYBIVecInst<v8i16>;
+ def v4i32: SHLQBYBIVecInst<v4i32>;
+ def v4f32: SHLQBYBIVecInst<v4f32>;
+ def v2i64: SHLQBYBIVecInst<v2i64>;
+ def v2f64: SHLQBYBIVecInst<v2f64>;
+
+ def r128: SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+ ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+ ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+ def v8i16: ROTHVecInst<v8i16>;
+ def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+ ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+ def v8i16: ROTHIVecInst<v8i16>;
+ def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+ def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+ ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+ ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [(set rclass:$rT,
+ (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+ def v4i32: ROTVecInst<v4i32>;
+ def r32: ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+ def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+ def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+ def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+ def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
+ def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+ ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+ [(set (type rc:$rT),
+ (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+ ROTQBYGenInst<type, VECREG>;
+
+multiclass RotateQuadLeftByBytes
+{
+ def v16i8: ROTQBYVecInst<v16i8>;
+ def v8i16: ROTQBYVecInst<v8i16>;
+ def v4i32: ROTQBYVecInst<v4i32>;
+ def v4f32: ROTQBYVecInst<v4f32>;
+ def v2i64: ROTQBYVecInst<v2i64>;
+ def v2f64: ROTQBYVecInst<v2f64>;
+ def i128: ROTQBYGenInst<i128, GPRC>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+ ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+ [(set (type rclass:$rT),
+ (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+ ROTQBYIGenInst<vectype, VECREG>;
+
+multiclass RotateQuadByBytesImm
+{
+ def v16i8: ROTQBYIVecInst<v16i8>;
+ def v8i16: ROTQBYIVecInst<v8i16>;
+ def v4i32: ROTQBYIVecInst<v4i32>;
+ def v4f32: ROTQBYIVecInst<v4f32>;
+ def v2i64: ROTQBYIVecInst<v2i64>;
+ def vfi64: ROTQBYIVecInst<v2f64>;
+ def i128: ROTQBYIGenInst<i128, GPRC>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00110011100, OOL, IOL,
+ "rotqbybi\t$rT, $rA, $shift",
+ RotShiftQuad, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+ ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+ def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+ def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+ def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+ def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+ ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+ ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+ def v16i8: ROTQBIVecInst<v16i8>;
+ def v8i16: ROTQBIVecInst<v8i16>;
+ def v4i32: ROTQBIVecInst<v4i32>;
+ def v2i64: ROTQBIVecInst<v2i64>;
+
+ def r128: ROTQBIRegInst<GPRC>;
+ def r64: ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+ def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+ def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+ def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+ def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+ def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+// so this only matches a synthetically generated/lowered code
+// fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+def ROTHMv8i16:
+ ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+ ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+ (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+def ROTHMIv8i16:
+ ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def ROTHMIr16:
+ ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+def ROTMv4i32:
+ ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
+
+def ROTMr32:
+ ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+ (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+ RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+ RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
+ [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+ (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+ (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+ ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+ ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+ def v16i8: ROTQMBYVecInst<v16i8>;
+ def v8i16: ROTQMBYVecInst<v8i16>;
+ def v4i32: ROTQMBYVecInst<v4i32>;
+ def v2i64: ROTQMBYVecInst<v2i64>;
+
+ def r128: ROTQMBYRegInst<GPRC>;
+ def r64: ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+ (ROTQMBYr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0))>;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+ ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+ ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+ def v16i8: ROTQMBYIVecInst<v16i8>;
+ def v8i16: ROTQMBYIVecInst<v8i16>;
+ def v4i32: ROTQMBYIVecInst<v4i32>;
+ def v2i64: ROTQMBYIVecInst<v2i64>;
+
+ def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+ def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+
+ def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+ def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+ def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+ def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+ ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+ def v16i8: ROTQMBYBIVecInst<v16i8>;
+ def v8i16: ROTQMBYBIVecInst<v8i16>;
+ def v4i32: ROTQMBYBIVecInst<v4i32>;
+ def v2i64: ROTQMBYBIVecInst<v2i64>;
+ def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [/*no pattern*/]>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+ ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+ ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+ def v16i8: ROTQMBIVecInst<v16i8>;
+ def v8i16: ROTQMBIVecInst<v8i16>;
+ def v4i32: ROTQMBIVecInst<v4i32>;
+ def v2i64: ROTQMBIVecInst<v2i64>;
+
+ def r128: ROTQMBIRegInst<GPRC>;
+ def r64: ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+ (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0)),
+ (SFIr32 R32C:$rB, 0))>;
+
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+ ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+ ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+ def v16i8: ROTQMBIIVecInst<v16i8>;
+ def v8i16: ROTQMBIIVecInst<v8i16>;
+ def v4i32: ROTQMBIIVecInst<v4i32>;
+ def v2i64: ROTQMBIIVecInst<v2i64>;
+
+ def r128: ROTQMBIIRegInst<GPRC>;
+ def r64: ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+ RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+def ROTMAHr16:
+ RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+ (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+ RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
+ [(set (v8i16 VECREG:$rT),
+ (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def ROTMAHIr16:
+ RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
+ [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def ROTMAv4i32:
+ RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
+
+def ROTMAr32:
+ RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+ (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011110000, OOL, IOL,
+ "rotmai\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+ [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+ def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+ def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+ def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+ def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+ // Halt If Equal (r32 preferred slot only, no vector form)
+ def HEQr32:
+ RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+ "heq\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HEQIr32 :
+ RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+ "heqi\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+ // contrasting with HLGT/HLGTI, which use unsigned comparison:
+ def HGTr32:
+ RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HGTIr32:
+ RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTr32:
+ RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hlgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTIr32:
+ RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hlgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+ def v16i8 :
+ CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+ def v16i8 :
+ CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+ def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+ def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (seteq (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+ def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+ def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+ def v16i8 :
+ CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+ def v16i8 :
+ CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+ def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+ def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setgt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+ def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+ def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+ def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+ def v16i8 :
+ CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+ def v16i8 :
+ CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+ def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+ def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setugt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+ def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+ def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB : CmpEqualByte;
+defm CEQBI : CmpEqualByteImm;
+defm CEQH : CmpEqualHalfword;
+defm CEQHI : CmpEqualHalfwordImm;
+defm CEQ : CmpEqualWord;
+defm CEQI : CmpEqualWordImm;
+defm CGTB : CmpGtrByte;
+defm CGTBI : CmpGtrByteImm;
+defm CGTH : CmpGtrHalfword;
+defm CGTHI : CmpGtrHalfwordImm;
+defm CGT : CmpGtrWord;
+defm CGTI : CmpGtrWordImm;
+defm CLGTB : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT : CmpLGtrWord;
+defm CLGTI : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+// matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+// i64 comparisons. See the tblgen fundamentals documentation for what
+// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+// class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+ (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType immtype,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+ (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+ (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setle R8C:$rA, immU8:$imm),
+ (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+ (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+ (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+ (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setule R8C:$rA, immU8:$imm),
+ (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+ ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+ CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+ (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+ ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+ (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+ SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+ SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+ // All calls clobber the non-callee-saved registers:
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+ R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+ R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+ R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+ R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+ R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+ R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+ R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+ // All of these instructions use $lr (aka $0)
+ Uses = [R0] in {
+ // Branch relative and set link: Used if we actually know that the target
+ // is within [-32768, 32767] bytes of the target
+ def BRSL:
+ BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+ "brsl\t$$lr, $func",
+ [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+ // Branch absolute and set link: Used if we actually know that the target
+ // is an absolute address
+ def BRASL:
+ BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+ "brasl\t$$lr, $func",
+ [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+ // Branch indirect and set link if external data. These instructions are not
+ // actually generated, matched by an intrinsic:
+ def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+ // Branch indirect and set link. This is the "X-form" address version of a
+ // function call
+ def BISL:
+ BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+ (BRSL texternalsym:$func)>;
+
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+ (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ let isBarrier = 1 in {
+ def BR :
+ UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+ "br\t$dest",
+ [(br bb:$dest)]>;
+
+ // Unconditional, absolute address branch
+ def BRA:
+ UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+ "bra\t$dest",
+ [/* no pattern */]>;
+
+ // Indirect branch
+ let isIndirectBranch = 1 in {
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ }
+ }
+
+ // Conditional branches:
+ class BRNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+ BranchResolv, pattern>;
+
+ class BRNZRegInst<RegisterClass rclass>:
+ BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRNZVecInst<ValueType vectype>:
+ BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+ [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+ multiclass BranchNotZero {
+ def v4i32 : BRNZVecInst<v4i32>;
+ def r32 : BRNZRegInst<R32C>;
+ }
+
+ defm BRNZ : BranchNotZero;
+
+ class BRZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+ BranchResolv, pattern>;
+
+ class BRZRegInst<RegisterClass rclass>:
+ BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRZVecInst<ValueType vectype>:
+ BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZero {
+ def v4i32: BRZVecInst<v4i32>;
+ def r32: BRZRegInst<R32C>;
+ }
+
+ defm BRZ: BranchZero;
+
+ // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+ // be useful:
+ /*
+ class BINZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+ class BINZRegInst<RegisterClass rclass>:
+ BINZInst<(ins rclass:$rA, brtarget:$dest),
+ [(brcond rclass:$rA, R32C:$dest)]>;
+
+ class BINZVecInst<ValueType vectype>:
+ BINZInst<(ins VECREG:$rA, R32C:$dest),
+ [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+ multiclass BranchNotZeroIndirect {
+ def v4i32: BINZVecInst<v4i32>;
+ def r32: BINZRegInst<R32C>;
+ }
+
+ defm BINZ: BranchNotZeroIndirect;
+
+ class BIZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+ class BIZRegInst<RegisterClass rclass>:
+ BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+ class BIZVecInst<ValueType vectype>:
+ BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+ multiclass BranchZeroIndirect {
+ def v4i32: BIZVecInst<v4i32>;
+ def r32: BIZRegInst<R32C>;
+ }
+
+ defm BIZ: BranchZeroIndirect;
+ */
+
+ class BRHNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+ pattern>;
+
+ class BRHNZRegInst<RegisterClass rclass>:
+ BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRHNZVecInst<ValueType vectype>:
+ BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchNotZeroHalfword {
+ def v8i16: BRHNZVecInst<v8i16>;
+ def r16: BRHNZRegInst<R16C>;
+ }
+
+ defm BRHNZ: BranchNotZeroHalfword;
+
+ class BRHZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+ pattern>;
+
+ class BRHZRegInst<RegisterClass rclass>:
+ BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRHZVecInst<ValueType vectype>:
+ BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZeroHalfword {
+ def v8i16: BRHZVecInst<v8i16>;
+ def r16: BRHZRegInst<R16C>;
+ }
+
+ defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+ (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+ (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+ (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+ (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+ let isReturn = 1 in {
+ def RET:
+ RETForm<"bi\t$$lr", [(retflag)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+ FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+ def v4f32: FAVecInst<v4f32>;
+ def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+ FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+ def v4f32: FSVecInst<v4f32>;
+ def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+ RRForm_1<0b00110111000, OOL, IOL,
+ "frest\t$rT, $rA", SPrecFP,
+ [/* no pattern */]>;
+
+def FRESTv4f32 :
+ FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+ FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+ RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+def FIf32 :
+ RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+ RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fceq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+ (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+ RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmeq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+ RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
+ (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+ RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+ RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+ "fscrwr\t$rA", SPrecFP,
+ [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+ RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+ "fscrrd\t$rT", SPrecFP,
+ [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+// frest rC rB # c = 1/b (both lines)
+// fi rC rB rC
+// fm rD rA rC # d = a * 1/b
+// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+// fma rB rB rC rD # b = b * c + d
+// = -(d *b -a) * c + d
+// = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+ CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+ CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+ CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+ RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
+
+def FESDf32 :
+ RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+// "frds\t$rT, $rA,", SPrecFP,
+// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+ RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+ "frds\t$rT, $rA", SPrecFP,
+ [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+ RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+ RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+ RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+ RRForm<0b00111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMAv2f64:
+ RRForm<0b00111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSf64 :
+ RRForm<0b10111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSv2f64 :
+ RRForm<0b10111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+ (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+ DPrecFP, pattern>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+ DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+ DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ pattern>;
+
+multiclass DFMultiplySubtract
+{
+ def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB))))]>;
+
+ def f64 : DFNMSRegInst<[(set R64FP:$rT,
+ (fsub R64FP:$rC,
+ (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+ RRForm<0b11111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FNMAv2f64 :
+ RRForm<0b11111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fneg (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+ (XORfnegvec (v4f32 VECREG:$rA),
+ (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+ (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+ (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+ (ANDfabsvec (v4f32 VECREG:$rA),
+ (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+def HBRA :
+ HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000010;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000000;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+ (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+ (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+ (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+ (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+ (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+ (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+ (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+ (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+ (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+ (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+ (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+ (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+ (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+ (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+
+def : Pat<(i64 (sext R8C:$rSrc)),
+ (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+ (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+ (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+ (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+ (COPY_TO_REGCLASS
+ (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
+ 0x4),
+ (ILv4i32 0x0),
+ (FSMBIv4i32 0x0f0f)), R64C)>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+ (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+ (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+ (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+
+def : Pat<(sext R16C:$rSrc),
+ (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+ (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+ (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+ (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+ (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+ (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+ (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+ (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+ (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+ (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 0000000..3ef3ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,49 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+private:
+ /// UsesLR - Indicates whether LR is used in the current function.
+ ///
+ bool UsesLR;
+
+ // VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+public:
+ SPUFunctionInfo(MachineFunction& MF)
+ : UsesLR(false),
+ VarArgsFrameIndex(0)
+ {}
+
+ void setUsesLR(bool U) { UsesLR = U; }
+ bool usesLR() { return UsesLR; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 0000000..ed7129e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+// Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+ (ORv4i32
+ (ANDv4i32
+ (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+ (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)),
+ (ILAv4i32 0x0000ffff)),
+ (SHLIv4i32
+ (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+ (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+ (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)), 16))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+ (FSMBIv8i16 0xcccc))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
+ (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
+ (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+ Interpf32.Fragment,
+ DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+ (SELBf32_cond NRaphf32.Fragment,
+ Epsilonf32.Fragment,
+ (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)),
+ Interpv4f32.Fragment,
+ DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (SELBv4f32_cond NRaphv4f32.Fragment,
+ Epsilonv4f32.Fragment,
+ (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+ Epsilonv4f32.Fragment,
+ (v4f32 VECREG:$rA)), -1))>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 0000000..a6e621f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,159 @@
+//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+ SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+ SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+ SPUvecshift_type>;
+
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 0000000..e2bd2d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to
+// satisfy the dual issue requirements. The actual dual issue scheduling is
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct SPUNopFiller : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const InstrItineraryData *IID;
+ bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
+
+ static char ID;
+ SPUNopFiller(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
+ IID(tm.getInstrItineraryData())
+ {
+ DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+ }
+
+ virtual const char *getPassName() const {
+ return "SPU nop/lnop Filler";
+ }
+
+ void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ isEvenPlace = true; //all functions get an .align 3 directive at start
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ runOnMachineBasicBlock(*FI);
+ return true; //never-ever do any more modifications, just print it!
+ }
+
+ typedef enum { none = 0, // no more instructions in this function / BB
+ pseudo = 1, // this does not get executed
+ even = 2,
+ odd = 3 } SPUOpPlace;
+ SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+ };
+ char SPUNopFiller::ID = 0;
+
+}
+
+// Fill a BasicBlock to alignment.
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ assert( isEvenPlace && "basic block start from odd address");
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ {
+ SPUOpPlace this_optype, next_optype;
+ MachineBasicBlock::iterator J = I;
+ J++;
+
+ this_optype = getOpPlacement( *I );
+ next_optype = none;
+ while (J!=MBB.end()){
+ next_optype = getOpPlacement( *J );
+ ++J;
+ if (next_optype != pseudo )
+ break;
+ }
+
+ // padd: odd(wrong), even(wrong), ...
+ // to: nop(corr), odd(corr), even(corr)...
+ if( isEvenPlace && this_optype == odd && next_optype == even ) {
+ DEBUG( dbgs() <<"Adding NOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+ isEvenPlace=false;
+ }
+
+ // padd: even(wrong), odd(wrong), ...
+ // to: lnop(corr), even(corr), odd(corr)...
+ else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+ DEBUG( dbgs() <<"Adding LNOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+ isEvenPlace=true;
+ }
+
+ // now go to next mem slot
+ if( this_optype != pseudo )
+ isEvenPlace = !isEvenPlace;
+
+ }
+
+ // padd basicblock end
+ if( !isEvenPlace ){
+ MachineBasicBlock::iterator J = MBB.end();
+ J--;
+ if (getOpPlacement( *J ) == odd) {
+ DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+ }
+ else {
+ J++;
+ DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+ BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+ }
+ isEvenPlace=true;
+ }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+ return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+ int sc = instr.getDesc().getSchedClass();
+ const InstrStage *stage = IID->beginStage(sc);
+ unsigned FUs = stage->getUnits();
+ SPUOpPlace retval;
+
+ switch( FUs ) {
+ case 0: retval = pseudo; break;
+ case 1: retval = odd; break;
+ case 2: retval = even; break;
+ default: retval= pseudo;
+ assert( false && "got unknown FuncUnit\n");
+ break;
+ };
+ return retval;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 0000000..96cde51
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,664 @@
+//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+// TO_IMM32 - Convert an i8/i16 to i32.
+def TO_IMM32 : SDNodeXForm<imm, [{
+ return getI32Imm(N->getZExtValue());
+}]>;
+
+// TO_IMM16 - Convert an i8/i32 to i16.
+def TO_IMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
+}]>;
+
+
+def LO16 : SDNodeXForm<imm, [{
+ unsigned val = N->getZExtValue();
+ // Transformation function: get the low 16 bits.
+ return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ // Transformation function: get the low 16 bit immediate from a build_vector
+ // node.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "LO16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "HI16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+ int sextVal = int(N->getSExtValue());
+ return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8 : PatLeaf<(imm), [{
+ int Value = int(N->getSExtValue());
+ return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10 : PatLeaf<(imm), [{
+ return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field. Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10 : PatLeaf<(imm), [{
+ return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10 : PatLeaf<(imm), [{
+ return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+ return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ short Ignored;
+ return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+ // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+ return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18 : PatLeaf<(imm), [{
+ // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+ int Value = (int) N->getZExtValue();
+ return isUInt<18>(Value);
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+ // lo16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((val & 0x0000ffff) == val);
+ }
+
+ return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+ // hi16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = uint32_t(N->getZExtValue());
+ return ((val & 0xffff0000) == val);
+ } else if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((val & 0xffff0000ULL) == val);
+ }
+
+ return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+ // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+ // (shift left quadword by bits immediate)
+ int64_t Val = N->getZExtValue();
+ return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+ short Ignored;
+ return isFPS16Immediate(N, Ignored);
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((val & 0xffff0000) == val);
+ }
+
+ return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+ return isUInt<18>(Value);
+ }
+
+ return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm : Operand<i16> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm : Operand<i32> {
+ let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+ let PrintMethod = "printPCRelativeOperand";
+ let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+ let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+ let PrintMethod = "printShufAddr";
+ let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+ let PrintMethod = "printDFormAddr";
+ let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+ let PrintMethod = "printAddr256K";
+ let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+ let PrintMethod = "printMemRegImmS18";
+ let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form : [r+I10] (10-bit signed offset + reg)
+// X-form : [r+r] (reg+reg)
+// A-form : abs (256K LSA offset)
+// D-form(2): [r+I7] (7-bit signed offset + reg)
+
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+ [], [SDNPWantRoot]>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+ [], [SDNPWantRoot]>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+ [], [SDNPWantRoot]>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+ [], [SDNPWantRoot]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 0000000..bbac6fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,356 @@
+//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace SPU;
+ switch (RegEnum) {
+ case SPU::R0: return 0;
+ case SPU::R1: return 1;
+ case SPU::R2: return 2;
+ case SPU::R3: return 3;
+ case SPU::R4: return 4;
+ case SPU::R5: return 5;
+ case SPU::R6: return 6;
+ case SPU::R7: return 7;
+ case SPU::R8: return 8;
+ case SPU::R9: return 9;
+ case SPU::R10: return 10;
+ case SPU::R11: return 11;
+ case SPU::R12: return 12;
+ case SPU::R13: return 13;
+ case SPU::R14: return 14;
+ case SPU::R15: return 15;
+ case SPU::R16: return 16;
+ case SPU::R17: return 17;
+ case SPU::R18: return 18;
+ case SPU::R19: return 19;
+ case SPU::R20: return 20;
+ case SPU::R21: return 21;
+ case SPU::R22: return 22;
+ case SPU::R23: return 23;
+ case SPU::R24: return 24;
+ case SPU::R25: return 25;
+ case SPU::R26: return 26;
+ case SPU::R27: return 27;
+ case SPU::R28: return 28;
+ case SPU::R29: return 29;
+ case SPU::R30: return 30;
+ case SPU::R31: return 31;
+ case SPU::R32: return 32;
+ case SPU::R33: return 33;
+ case SPU::R34: return 34;
+ case SPU::R35: return 35;
+ case SPU::R36: return 36;
+ case SPU::R37: return 37;
+ case SPU::R38: return 38;
+ case SPU::R39: return 39;
+ case SPU::R40: return 40;
+ case SPU::R41: return 41;
+ case SPU::R42: return 42;
+ case SPU::R43: return 43;
+ case SPU::R44: return 44;
+ case SPU::R45: return 45;
+ case SPU::R46: return 46;
+ case SPU::R47: return 47;
+ case SPU::R48: return 48;
+ case SPU::R49: return 49;
+ case SPU::R50: return 50;
+ case SPU::R51: return 51;
+ case SPU::R52: return 52;
+ case SPU::R53: return 53;
+ case SPU::R54: return 54;
+ case SPU::R55: return 55;
+ case SPU::R56: return 56;
+ case SPU::R57: return 57;
+ case SPU::R58: return 58;
+ case SPU::R59: return 59;
+ case SPU::R60: return 60;
+ case SPU::R61: return 61;
+ case SPU::R62: return 62;
+ case SPU::R63: return 63;
+ case SPU::R64: return 64;
+ case SPU::R65: return 65;
+ case SPU::R66: return 66;
+ case SPU::R67: return 67;
+ case SPU::R68: return 68;
+ case SPU::R69: return 69;
+ case SPU::R70: return 70;
+ case SPU::R71: return 71;
+ case SPU::R72: return 72;
+ case SPU::R73: return 73;
+ case SPU::R74: return 74;
+ case SPU::R75: return 75;
+ case SPU::R76: return 76;
+ case SPU::R77: return 77;
+ case SPU::R78: return 78;
+ case SPU::R79: return 79;
+ case SPU::R80: return 80;
+ case SPU::R81: return 81;
+ case SPU::R82: return 82;
+ case SPU::R83: return 83;
+ case SPU::R84: return 84;
+ case SPU::R85: return 85;
+ case SPU::R86: return 86;
+ case SPU::R87: return 87;
+ case SPU::R88: return 88;
+ case SPU::R89: return 89;
+ case SPU::R90: return 90;
+ case SPU::R91: return 91;
+ case SPU::R92: return 92;
+ case SPU::R93: return 93;
+ case SPU::R94: return 94;
+ case SPU::R95: return 95;
+ case SPU::R96: return 96;
+ case SPU::R97: return 97;
+ case SPU::R98: return 98;
+ case SPU::R99: return 99;
+ case SPU::R100: return 100;
+ case SPU::R101: return 101;
+ case SPU::R102: return 102;
+ case SPU::R103: return 103;
+ case SPU::R104: return 104;
+ case SPU::R105: return 105;
+ case SPU::R106: return 106;
+ case SPU::R107: return 107;
+ case SPU::R108: return 108;
+ case SPU::R109: return 109;
+ case SPU::R110: return 110;
+ case SPU::R111: return 111;
+ case SPU::R112: return 112;
+ case SPU::R113: return 113;
+ case SPU::R114: return 114;
+ case SPU::R115: return 115;
+ case SPU::R116: return 116;
+ case SPU::R117: return 117;
+ case SPU::R118: return 118;
+ case SPU::R119: return 119;
+ case SPU::R120: return 120;
+ case SPU::R121: return 121;
+ case SPU::R122: return 122;
+ case SPU::R123: return 123;
+ case SPU::R124: return 124;
+ case SPU::R125: return 125;
+ case SPU::R126: return 126;
+ case SPU::R127: return 127;
+ default:
+ report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+ }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+ const TargetInstrInfo &tii) :
+ SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
+{
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return &SPU::R32CRegClass;
+}
+
+const unsigned *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Cell ABI calling convention
+ static const unsigned SPU_CalleeSaveRegs[] = {
+ SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+ SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+ SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+ SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+ SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+ SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+ SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+ SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+ SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+ SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+ SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+ SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+ SPU::R2, /* environment pointer */
+ SPU::R1, /* stack pointer */
+ SPU::R0, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegs;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SPU::R0); // LR
+ Reserved.set(SPU::R1); // SP
+ Reserved.set(SPU::R2); // environment pointer
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+ const
+{
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = II->getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &SPOp = MI.getOperand(i);
+ int FrameIndex = SPOp.getIndex();
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+
+ // Most instructions, except for generated FrameIndex additions using AIr32
+ // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+ // immediate in operand 2.
+ unsigned OpNo = 1;
+ if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+ OpNo = 2;
+
+ MachineOperand &MO = MI.getOperand(OpNo);
+
+ // Offset is biased by $lr's slot at the bottom.
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
+ assert((Offset & 0xf) == 0
+ && "16-byte alignment violated in eliminateFrameIndex");
+
+ // Replace the FrameIndex with base register with $sp (aka $r1)
+ SPOp.ChangeToRegister(SPU::R1, false);
+
+ // if 'Offset' doesn't fit to the D-form instruction's
+ // immediate, convert the instruction to X-form
+ // if the instruction is not an AI (which takes a s10 immediate), assume
+ // it is a load/store that can take a s14 immediate
+ if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+ || !isInt<14>(Offset)) {
+ int newOpcode = convertDFormToXForm(MI.getOpcode());
+ unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+ BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+ .addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+ .addReg(tmpReg, RegState::Kill)
+ .addReg(SPU::R1);
+ // remove the replaced D-form instruction
+ MBB.erase(II);
+ } else {
+ MO.ChangeToImmediate(Offset);
+ }
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+ return SPU::R1;
+}
+
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+ switch(dFormOpcode)
+ {
+ case SPU::AIr32: return SPU::Ar32;
+ case SPU::LQDr32: return SPU::LQXr32;
+ case SPU::LQDr128: return SPU::LQXr128;
+ case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
+ case SPU::LQDv4f32: return SPU::LQXv4f32;
+ case SPU::STQDr32: return SPU::STQXr32;
+ case SPU::STQDr128: return SPU::STQXr128;
+ case SPU::STQDv16i8: return SPU::STQXv16i8;
+ case SPU::STQDv4i32: return SPU::STQXv4i32;
+ case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+ default: assert( false && "Unhandled D to X-form conversion");
+ }
+ // default will assert, but need to return something to keep the
+ // compiler happy.
+ return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const
+{
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ assert( Reg && "Register scavenger failed");
+ return Reg;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 0000000..b7818a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,101 @@
+//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+
+#define GET_REGINFO_HEADER
+#include "SPUGenRegisterInfo.inc"
+
+namespace llvm {
+ class SPUSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ class SPURegisterInfo : public SPUGenRegisterInfo {
+ private:
+ const SPUSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ //! Predicate: Does the machine function use the link register?
+ bool usesLR(MachineFunction &MF) const;
+
+ public:
+ SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+
+ //! Translate a register's enum value to a register number
+ /*!
+ This method translates a register's enum value to it's regiser number,
+ e.g. SPU::R14 -> 14.
+ */
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *
+ getPointerRegClass(unsigned Kind = 0) const;
+
+ /// After allocating this many registers, the allocator should feel
+ /// register pressure. The value is a somewhat random guess, based on the
+ /// number of non callee saved registers in the C calling convention.
+ virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+ MachineFunction &MF) const{
+ return 50;
+ }
+
+ //! Return the array of callee-saved registers
+ virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+ //! Allow for scavenging, so we can get scratch registers when needed.
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+ { return true; }
+
+ //! Return the reserved registers
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ //! Eliminate the call frame setup pseudo-instructions
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ //! Convert frame indicies into machine operands
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
+
+ //! Get the stack frame register (SP, aka R1)
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //------------------------------------------------------------------------
+ // New methods added:
+ //------------------------------------------------------------------------
+
+ //! Convert D-form load/store to X-form load/store
+ /*!
+ Converts a regiser displacement load/store into a register-indexed
+ load/store for large stack frames, when the stack frame exceeds the
+ range of a s10 displacement.
+ */
+ int convertDFormToXForm(int dFormOpcode) const;
+
+ //! Acquire an unused register in an emergency.
+ unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const;
+
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 0000000..e16f51f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,183 @@
+//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+ let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+ field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+// The non-volatile registers are allocated in reverse order, like PPC does it.
+def GPRC : RegisterClass<"SPU", [i128], 128,
+ (add (sequence "R%u", 0, 79),
+ (sequence "R%u", 127, 80))>;
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
+ (add GPRC)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 0000000..e557ed3
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,19 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 0000000..9cd3c23
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,59 @@
+//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore : InstrItinClass; // ODD_UNIT
+def BranchHints : InstrItinClass; // ODD_UNIT
+def BranchResolv : InstrItinClass; // ODD_UNIT
+def ChanOpSPR : InstrItinClass; // ODD_UNIT
+def ShuffleOp : InstrItinClass; // ODD_UNIT
+def SelectOp : InstrItinClass; // ODD_UNIT
+def GatherOp : InstrItinClass; // ODD_UNIT
+def LoadNOP : InstrItinClass; // ODD_UNIT
+def ExecNOP : InstrItinClass; // EVEN_UNIT
+def SPrecFP : InstrItinClass; // EVEN_UNIT
+def DPrecFP : InstrItinClass; // EVEN_UNIT
+def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
+def ByteOp : InstrItinClass; // EVEN_UNIT
+def IntegerOp : InstrItinClass; // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
+def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
+def ImmLoad : InstrItinClass; // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
+ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
+ InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
+ InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
+ InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
+ InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
+ InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
+ ]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 0000000..5732fd4
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUTargetMachine.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 0000000..39257d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUTargetMachine;
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
+ ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 0000000..43335ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,66 @@
+//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPUGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS) :
+ SPUGenSubtargetInfo(TT, CPU, FS),
+ StackAlignment(16),
+ ProcDirective(SPU::DEFAULT_PROC),
+ UseLargeMem(false)
+{
+ // Should be the target SPU processor type. For now, since there's only
+ // one, simply default to the current "v0" default:
+ std::string default_cpu("v0");
+
+ // Parse features string.
+ ParseSubtargetFeatures(default_cpu, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ // CriticalPathsRCs seems to be the set of
+ // RegisterClasses that antidep breakings are performed for.
+ // Do it for all register classes
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&SPU::R8CRegClass);
+ CriticalPathRCs.push_back(&SPU::R16CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+ CriticalPathRCs.push_back(&SPU::R64CRegClass);
+ CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 0000000..7c4aa14
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,97 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SPUGenSubtargetInfo.inc"
+
+namespace llvm {
+ class GlobalValue;
+ class StringRef;
+
+ namespace SPU {
+ enum {
+ PROC_NONE,
+ DEFAULT_PROC
+ };
+ }
+
+ class SPUSubtarget : public SPUGenSubtargetInfo {
+ protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame
+ /// on entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which SPU processor (this isn't really used, but it's there to keep
+ /// the C compiler happy)
+ unsigned ProcDirective;
+
+ /// Use (assume) large memory -- effectively disables the LQA/STQA
+ /// instructions that assume 259K local store.
+ bool UseLargeMem;
+
+ public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ SPUSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by
+ /// every function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ /// Use large memory addressing predicate
+ bool usingLargeMem() const {
+ return UseLargeMem;
+ }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
+ "-s:128:128-n32:64";
+ }
+
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 0000000..93a7f6e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,74 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeCellSPUTarget() {
+ // Register the target.
+ RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
+}
+
+const std::pair<unsigned, int> *
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 1;
+ return &LR[0];
+}
+
+SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getTargetDataString()),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this),
+ TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createSPUISelDag(*this));
+ return false;
+}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ // load the TCE instruction scheduler, if available via
+ // loaded plugins
+ typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
+ BuilderFunc schedulerCreator =
+ (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "createTCESchedulerPass");
+ if (schedulerCreator != NULL)
+ PM.add(schedulerCreator("cellspu"));
+
+ //align instructions with nops/lnops for dual issue
+ PM.add(createSPUNopFillerPass(*this));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 0000000..fffe77c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,91 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+class TargetFrameLowering;
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+ SPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ SPUInstrInfo InstrInfo;
+ SPUFrameLowering FrameLowering;
+ SPUTargetLowering TLInfo;
+ SPUSelectionDAGInfo TSInfo;
+ InstrItineraryData InstrItins;
+public:
+ SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ /// Return the subtarget implementation object
+ virtual const SPUSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const SPUInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const SPUFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ /*!
+ \note Cell SPU does not support JIT today. It could support JIT at some
+ point.
+ */
+ virtual TargetJITInfo *getJITInfo() {
+ return NULL;
+ }
+
+ virtual const SPUTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const SPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const {
+ return &DataLayout;
+ }
+
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
new file mode 100644
index 0000000..84aadfa
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCellSPUTarget;
+
+extern "C" void LLVMInitializeCellSPUTargetInfo() {
+ RegisterTarget<Triple::cellspu>
+ X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 0000000..ae0e3c4
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,1978 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+ cl::value_desc("function name"));
+
+enum WhatToGenerate {
+ GenProgram,
+ GenModule,
+ GenContents,
+ GenFunction,
+ GenFunctions,
+ GenInline,
+ GenVariable,
+ GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+ cl::desc("Choose what kind of output to generate"),
+ cl::init(GenProgram),
+ cl::values(
+ clEnumValN(GenProgram, "program", "Generate a complete program"),
+ clEnumValN(GenModule, "module", "Generate a module definition"),
+ clEnumValN(GenContents, "contents", "Generate contents of a module"),
+ clEnumValN(GenFunction, "function", "Generate a function definition"),
+ clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+ clEnumValN(GenInline, "inline", "Generate an inline function"),
+ clEnumValN(GenVariable, "variable", "Generate a variable definition"),
+ clEnumValN(GenType, "type", "Generate a type definition"),
+ clEnumValEnd
+ )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+ cl::desc("Specify the name of the thing to generate"),
+ cl::init("!bad!"));
+
+extern "C" void LLVMInitializeCppBackendTarget() {
+ // Register the target.
+ RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
+}
+
+namespace {
+ typedef std::vector<Type*> TypeList;
+ typedef std::map<Type*,std::string> TypeMap;
+ typedef std::map<const Value*,std::string> ValueMap;
+ typedef std::set<std::string> NameSet;
+ typedef std::set<Type*> TypeSet;
+ typedef std::set<const Value*> ValueSet;
+ typedef std::map<const Value*,std::string> ForwardRefMap;
+
+ /// CppWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C++ translation unit.
+ class CppWriter : public ModulePass {
+ formatted_raw_ostream &Out;
+ const Module *TheModule;
+ uint64_t uniqueNum;
+ TypeMap TypeNames;
+ ValueMap ValueNames;
+ NameSet UsedNames;
+ TypeSet DefinedTypes;
+ ValueSet DefinedValues;
+ ForwardRefMap ForwardRefs;
+ bool is_inline;
+ unsigned indent_level;
+
+ public:
+ static char ID;
+ explicit CppWriter(formatted_raw_ostream &o) :
+ ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+
+ virtual const char *getPassName() const { return "C++ backend"; }
+
+ bool runOnModule(Module &M);
+
+ void printProgram(const std::string& fname, const std::string& modName );
+ void printModule(const std::string& fname, const std::string& modName );
+ void printContents(const std::string& fname, const std::string& modName );
+ void printFunction(const std::string& fname, const std::string& funcName );
+ void printFunctions();
+ void printInline(const std::string& fname, const std::string& funcName );
+ void printVariable(const std::string& fname, const std::string& varName );
+ void printType(const std::string& fname, const std::string& typeName );
+
+ void error(const std::string& msg);
+
+
+ formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
+ private:
+ void printLinkageType(GlobalValue::LinkageTypes LT);
+ void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printCallingConv(CallingConv::ID cc);
+ void printEscapedString(const std::string& str);
+ void printCFP(const ConstantFP* CFP);
+
+ std::string getCppName(Type* val);
+ inline void printCppName(Type* val);
+
+ std::string getCppName(const Value* val);
+ inline void printCppName(const Value* val);
+
+ void printAttributes(const AttrListPtr &PAL, const std::string &name);
+ void printType(Type* Ty);
+ void printTypes(const Module* M);
+
+ void printConstant(const Constant *CPV);
+ void printConstants(const Module* M);
+
+ void printVariableUses(const GlobalVariable *GV);
+ void printVariableHead(const GlobalVariable *GV);
+ void printVariableBody(const GlobalVariable *GV);
+
+ void printFunctionUses(const Function *F);
+ void printFunctionHead(const Function *F);
+ void printFunctionBody(const Function *F);
+ void printInstruction(const Instruction *I, const std::string& bbname);
+ std::string getOpName(const Value*);
+
+ void printModuleBody();
+ };
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+ Out << '\n';
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ Out.indent(indent_level);
+ return Out;
+}
+
+static inline void sanitize(std::string &str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+}
+
+static std::string getTypePrefix(Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ default: return "other_";
+ }
+ return "unknown_";
+}
+
+void CppWriter::error(const std::string& msg) {
+ report_fatal_error(msg);
+}
+
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(mod->getContext(), ";
+ Out << "APFloat(";
+#if HAVE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
+#endif
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << StrVal;
+ else
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
+#if HAVE_PRINTF_A
+ }
+#endif
+ Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
+ }
+}
+
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateLinkage:
+ Out << "GlobalValue::LinkerPrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
+ }
+}
+
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ default: llvm_unreachable("Unknown GVar visibility");
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
+ }
+}
+
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+}
+
+std::string CppWriter::getCppName(Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
+ case Type::X86_MMXTyID: return "Type::getX86_MMXTy(mod->getContext())";
+ default:
+ error("Invalid primitive type");
+ break;
+ }
+ // shouldn't be returned, but make it sensible
+ return "Type::getVoidTy(mod->getContext())";
+ }
+
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
+
+ // See if the type has a name in the symboltable and build accordingly
+ std::string name;
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (STy->hasName())
+ name = STy->getName();
+
+ if (name.empty())
+ name = utostr(uniqueNum++);
+
+ name = std::string(prefix) + name;
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+}
+
+void CppWriter::printCppName(Type* Ty) {
+ printEscapedString(getCppName(Ty));
+}
+
+std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ if (val->hasName())
+ name += val->getName();
+ else
+ name += utostr(uniqueNum++);
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+}
+
+void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+}
+
+void CppWriter::printAttributes(const AttrListPtr &PAL,
+ const std::string &name) {
+ Out << "AttrListPtr " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+ Out << "AttributeWithIndex PAWI;"; nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlot(i).Index;
+ Attributes attrs = PAL.getSlot(i).Attrs;
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+#define HANDLE_ATTR(X) \
+ if (attrs & Attribute::X) \
+ Out << " | Attribute::" #X; \
+ attrs &= ~Attribute::X;
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(NoCapture);
+ HANDLE_ATTR(NoRedZone);
+ HANDLE_ATTR(NoImplicitFloat);
+ HANDLE_ATTR(Naked);
+ HANDLE_ATTR(InlineHint);
+ HANDLE_ATTR(ReturnsTwice);
+ HANDLE_ATTR(UWTable);
+ HANDLE_ATTR(NonLazyBind);
+#undef HANDLE_ATTR
+ if (attrs & Attribute::StackAlignment)
+ Out << " | Attribute::constructStackAlignmentFromInt("
+ << Attribute::getStackAlignmentFromAttrs(attrs)
+ << ")";
+ attrs &= ~Attribute::StackAlignment;
+ assert(attrs == 0 && "Unhandled attribute!");
+ Out << ";";
+ nl(Out);
+ Out << "Attrs.push_back(PAWI);";
+ nl(Out);
+ }
+ Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
+ }
+}
+
+void CppWriter::printType(Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return;
+
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return;
+
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ Type* argTy = static_cast<Type*>(*PI);
+ printType(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
+ Out << ");";
+ nl(Out);
+ }
+ printType(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ StructType* ST = cast<StructType>(Ty);
+ if (!ST->isLiteral()) {
+ Out << "StructType *" << typeName << " = mod->getTypeByName(\"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ Out << "if (!" << typeName << ") {";
+ nl(Out);
+ Out << typeName << " = ";
+ Out << "StructType::create(mod->getContext(), \"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ Out << "}";
+ nl(Out);
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+ }
+
+ Out << "std::vector<Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ Type* fieldTy = static_cast<Type*>(*EI);
+ printType(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ Out << ");";
+ nl(Out);
+ }
+
+ if (ST->isLiteral()) {
+ Out << "StructType *" << typeName << " = ";
+ Out << "StructType::get(" << "mod->getContext(), ";
+ } else {
+ Out << "if (" << typeName << "->isOpaque()) {";
+ nl(Out);
+ Out << typeName << "->setBody(";
+ }
+
+ Out << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ if (!ST->isLiteral()) {
+ Out << "}";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::ArrayTyID: {
+ ArrayType* AT = cast<ArrayType>(Ty);
+ Type* ET = AT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::PointerTyID: {
+ PointerType* PT = cast<PointerType>(Ty);
+ Type* ET = PT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::VectorTyID: {
+ VectorType* PT = cast<VectorType>(Ty);
+ Type* ET = PT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
+
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
+}
+
+void CppWriter::printTypes(const Module* M) {
+ // Add all of the global variables to the value table.
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
+
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
+ }
+ }
+ }
+}
+
+
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
+
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
+
+ if (isa<GlobalValue>(CV)) {
+ // Skip variables and functions, we emit them elsewhere
+ return;
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName
+ << " = ConstantInt::get(mod->getContext(), APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth()
+ << ", StringRef(\"" << constValue << "\"), 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ if (CA->isString() &&
+ CA->getType()->getElementType() ==
+ Type::getInt8Ty(CA->getContext())) {
+ Out << "Constant* " << constName <<
+ " = ConstantArray::get(mod->getContext(), \"";
+ std::string tmp = CA->getAsString();
+ bool nullTerminate = false;
+ if (tmp[tmp.length()-1] == 0) {
+ tmp.erase(tmp.length()-1);
+ nullTerminate = true;
+ }
+ printEscapedString(tmp);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true"; // Indicate that the null terminator should be
+ // added.
+ else
+ Out << "\", false";// No null terminator
+ Out << ");";
+ } else {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CA->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CA->getOperand(i)); // recurse to print operands
+ Out << constName << "_elems.push_back("
+ << getCppName(CA->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantArray::get("
+ << typeName << ", " << constName << "_elems);";
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CP->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CP->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CP->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << constName << "_indices);";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::FAdd: Out << "getFAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::FSub: Out << "getFSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::FMul: Out << "getFMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
+ }
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
+ }
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
+ }
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "Constant* " << constName << " = ";
+ Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
+ }
+ nl(Out);
+}
+
+void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
+ }
+ }
+ }
+ }
+ }
+}
+
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ const Constant *Init = GV->getInitializer();
+ printType(Init->getType());
+ if (const Function *F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
+ }
+ }
+}
+
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
+ printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(/*Module=*/*mod, ";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\");";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
+ Out << "\");";
+ nl(Out);
+ }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
+ }
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocal(true);";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+}
+
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
+ nl(Out);
+ }
+}
+
+std::string CppWriter::getOpName(const Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
+
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
+
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ const unsigned Ops(I->getNumOperands());
+ std::string* opNames = new std::string[Ops];
+ for (unsigned i = 0; i < Ops; i++)
+ opNames[i] = getOpName(I->getOperand(i));
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create(mod->getContext(), "
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
+ }
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst *SI = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << getOpName(SI->getCondition()) << ", "
+ << getOpName(SI->getDefaultDest()) << ", "
+ << SI->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ unsigned NumCases = SI->getNumCases();
+ for (unsigned i = 1; i < NumCases; ++i) {
+ const ConstantInt* CaseVal = SI->getCaseValue(i);
+ const BasicBlock* BB = SI->getSuccessor(i);
+ Out << iName << "->addCase("
+ << getOpName(CaseVal) << ", "
+ << getOpName(BB) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::IndirectBr: {
+ const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+ Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+ << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ nl(Out);
+ for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+ Out << iName << "->addDestination(" << opNames[i] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Resume: {
+ Out << "ResumeInst::Create(mod->getContext(), " << opNames[0]
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << getOpName(inv->getArgOperand(i)) << ");";
+ nl(Out);
+ }
+ // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getNormalDest()) << ", "
+ << getOpName(inv->getUnwindDest()) << ", "
+ << iName << "_params, \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unwind: {
+ Out << "new UnwindInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Unreachable: {
+ Out << "new UnreachableInst("
+ << "mod->getContext(), "
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::FSub: Out << "Instruction::FSub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::FMul: Out << "Instruction::FMul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load: {
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << " new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", "
+ << phi->getNumIncomingValues() << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
+ Out << iName << "->addIncoming("
+ << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+ << getOpName(phi->getIncomingBlock(i)) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: assert(0 && "Unreachable"); break;
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call: {
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
+ nl(Out);
+ }
+ if (call->getNumArgOperands() > 1) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", "
+ << iName << "_params, \"";
+ } else if (call->getNumArgOperands() == 1) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", \"";
+ }
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true" : "false");
+ Out << ");";
+ nl(Out);
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices, \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices, \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ }
+ DefinedValues.insert(I);
+ nl(Out);
+ delete [] opNames;
+}
+
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
+
+ // Print the function's function type
+ printType(F->getFunctionType());
+
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+ }
+
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
+
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand)) {
+ consts.insert(C);
+ for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+ // If the operand references a GVal or Constant, make a note of it
+ Value* operand = C->getOperand(j);
+ printType(operand->getType());
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
+ }
+ }
+
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
+
+ // Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ if (GenerationType != GenFunction) {
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
+ }
+ }
+}
+
+void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(F);
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (F->hasGC()) {
+ printCppName(F);
+ Out << "->setGC(\"" << F->getGC() << "\");";
+ nl(Out);
+ }
+ Out << "}";
+ nl(Out);
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+}
+
+void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
+
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
+
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
+ nl(Out);
+ }
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
+ nl(Out);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+ nl(Out);
+ }
+ }
+ }
+
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname <<
+ " = BasicBlock::Create(mod->getContext(), \"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
+
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
+ }
+ }
+
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
+ nl(Out);
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
+ nl(Out);
+ ForwardRefs.erase(I);
+ }
+}
+
+void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+}
+
+void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
+
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
+ }
+
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+ }
+}
+
+void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/LLVMContext.h>\n";
+ Out << "#include <llvm/Module.h>\n";
+ Out << "#include <llvm/DerivedTypes.h>\n";
+ Out << "#include <llvm/Constants.h>\n";
+ Out << "#include <llvm/GlobalVariable.h>\n";
+ Out << "#include <llvm/Function.h>\n";
+ Out << "#include <llvm/CallingConv.h>\n";
+ Out << "#include <llvm/BasicBlock.h>\n";
+ Out << "#include <llvm/Instructions.h>\n";
+ Out << "#include <llvm/InlineAsm.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+}
+
+void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"";
+ printEscapedString(mName);
+ Out << "\", getGlobalContext());";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = TheModule->lib_begin();
+ Module::lib_iterator LE = TheModule->lib_end();
+ while (LI != LE) {
+ Out << "mod->addLibrary(\"" << *LI << "\");";
+ nl(Out);
+ ++LI;
+ }
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"";
+ printEscapedString(mName);
+ Out << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+}
+
+void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
+ }
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+}
+
+void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
+ }
+ }
+}
+
+void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+}
+
+void CppWriter::printType(const std::string &fname,
+ const std::string &typeName) {
+ Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
+ }
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ default:
+ error("Invalid generation option");
+ }
+
+ return false;
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+ PM.add(new CppWriter(o));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 0000000..287e537
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,44 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+ CPPTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : TargetMachine(T, TT, CPU, FS) {}
+
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
+
+ virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCppBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
new file mode 100644
index 0000000..a8ac0a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -0,0 +1,28 @@
+//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCppBackendTarget;
+
+static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
+ // This class always works, but shouldn't be the default in most cases.
+ return 1;
+}
+
+extern "C" void LLVMInitializeCppBackendTargetInfo() {
+ TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",
+ "C++ backend",
+ &CppBackend_TripleMatchQuality);
+}
+
+extern "C" void LLVMInitializeCppBackendTargetMC() {}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
new file mode 100644
index 0000000..2d357bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -0,0 +1,123 @@
+//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
+
+#include "llvm/Support/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+ class MBlazeBaseAsmLexer : public MCTargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+ protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const MCRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
+ }
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
+ }
+ }
+ public:
+ MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : MCTargetAsmLexer(T), AsmInfo(MAI) {
+ }
+ };
+
+ class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
+ public:
+ MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI,
+ const MCAsmInfo &MAI)
+ : MBlazeBaseAsmLexer(T, MAI) {
+ InitRegisterMap(&MRI);
+ }
+ };
+}
+
+AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default:
+ return AsmToken(lexedToken);
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return AsmToken(lexedToken);
+ case AsmToken::Identifier:
+ {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+
+ if (regID) {
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ } else {
+ return AsmToken(lexedToken);
+ }
+ }
+ }
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer() {
+ RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 0000000..97d311c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,565 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public MCTargetAsmParser {
+ MCAsmParser &Parser;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ MBlazeOperand *ParseRegister(unsigned &RegNo);
+ MBlazeOperand *ParseImmediate();
+ MBlazeOperand *ParseFsl();
+ MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+ /// }
+
+
+public:
+ MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), Parser(_Parser) {}
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ Register,
+ Memory,
+ Fsl
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ } Mem;
+
+ struct {
+ const MCExpr *Val;
+ } FslImm;
+ };
+
+ MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Memory:
+ Mem = o.Mem;
+ break;
+ case Fsl:
+ FslImm = o.FslImm;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getFslImm() const {
+ assert(Kind == Fsl && "Invalid access!");
+ return FslImm.Val;
+ }
+
+ unsigned getMemBase() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Base;
+ }
+
+ const MCExpr* getMemOff() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Off;
+ }
+
+ unsigned getMemOffReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.OffReg;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { return Kind == Memory; }
+ bool isFsl() const { return Kind == Fsl; }
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFslOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getFslImm());
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ Inst.addOperand(MCOperand::CreateReg(RegOff));
+ else
+ addExpr(Inst, getMemOff());
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+ MBlazeOperand *Op = new MBlazeOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Fsl);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.Off = Off;
+ Op->Mem.OffReg = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.OffReg = Off;
+ Op->Mem.Off = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << getMBlazeRegisterNumbering(getReg()) << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Memory: {
+ OS << "<memory R";
+ OS << getMBlazeRegisterNumbering(getMemBase());
+ OS << ", ";
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ OS << "R" << getMBlazeRegisterNumbering(RegOff);
+ else
+ OS << getMemOff();
+ OS << ">";
+ }
+ break;
+ case Fsl:
+ getFslImm()->print(OS);
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ SMLoc ErrorLoc;
+ unsigned ErrorInfo;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+ default: break;
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Operands.size() != 4)
+ return 0;
+
+ MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+ MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+ SMLoc S = Base.getStartLoc();
+ SMLoc O = Offset.getStartLoc();
+ SMLoc E = Offset.getEndLoc();
+
+ if (!Base.isReg()) {
+ Error(S, "base address must be a register");
+ return 0;
+ }
+
+ if (!Offset.isReg() && !Offset.isImm()) {
+ Error(O, "offset must be a register or immediate");
+ return 0;
+ }
+
+ MBlazeOperand *Op;
+ if (Offset.isReg())
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+ else
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+ delete Operands.pop_back_val();
+ delete Operands.pop_back_val();
+ Operands.push_back(Op);
+
+ return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ return (ParseRegister(RegNo) == 0);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+ if (RegNo == 0)
+ return 0;
+
+ getLexer().Lex();
+ return MBlazeOperand::CreateReg(RegNo, S, E);
+ }
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+ if (!String.startswith("rfsl"))
+ return -1;
+
+ unsigned regNum;
+ if (String.substr(4).getAsInteger(10,regNum))
+ return -1;
+
+ return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+ if (reg >= 16)
+ return 0;
+
+ getLexer().Lex();
+ const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+ return MBlazeOperand::CreateFslImm(EVal,S,E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ const MCExpr *EVal;
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Identifier:
+ if (getParser().ParseExpression(EVal))
+ return 0;
+
+ return MBlazeOperand::CreateImm(EVal, S, E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MBlazeOperand *Op;
+
+ // Attempt to parse the next token as a register name
+ unsigned RegNo;
+ Op = ParseRegister(RegNo);
+
+ // Attempt to parse the next token as an FSL immediate
+ if (!Op)
+ Op = ParseFsl();
+
+ // Attempt to parse the next token as an immediate
+ if (!Op)
+ Op = ParseImmediate();
+
+ // If the token could not be parsed then fail
+ if (!Op) {
+ Error(Parser.getTok().getLoc(), "unknown operand");
+ return 0;
+ }
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(Op);
+ return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The first operands is the token for the instruction name
+ size_t dotLoc = Name.find('.');
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+ if (dotLoc < Name.size())
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (!ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (!ParseOperand(Operands))
+ return true;
+ }
+
+ // If the instruction requires a memory operand then we need to
+ // replace the last two operands (base+offset) with a single
+ // memory operand.
+ if (Name.startswith("lw") || Name.startswith("sw") ||
+ Name.startswith("lh") || Name.startswith("sh") ||
+ Name.startswith("lb") || Name.startswith("sb"))
+ return (ParseMemory(Operands) == NULL);
+
+ return false;
+}
+
+/// ParseDirective parses the MBlaze specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+ RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+ LLVMInitializeMBlazeAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 0000000..fd761f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,709 @@
+//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeDisassembler.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+#include "MBlazeGenEDInfo.inc"
+
+namespace llvm {
+extern MCInstrDesc MBlazeInsts[];
+}
+
+using namespace llvm;
+
+const unsigned UNSUPPORTED = -1;
+
+static unsigned mblazeBinary2Opcode[] = {
+ MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
+ MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
+ MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
+ MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+ MBlaze::MUL, MBlaze::BSRL, MBlaze::IDIV, MBlaze::GETD, //10,11,12,13
+ UNSUPPORTED, UNSUPPORTED, MBlaze::FADD, UNSUPPORTED, //14,15,16,17
+ MBlaze::MULI, MBlaze::BSRLI, UNSUPPORTED, MBlaze::GET, //18,19,1A,1B
+ UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, //1C,1D,1E,1F
+
+ MBlaze::OR, MBlaze::AND, MBlaze::XOR, MBlaze::ANDN, //20,21,22,23
+ MBlaze::SEXT8, MBlaze::MFS, MBlaze::BR, MBlaze::BEQ, //24,25,26,27
+ MBlaze::ORI, MBlaze::ANDI, MBlaze::XORI, MBlaze::ANDNI, //28,29,2A,2B
+ MBlaze::IMM, MBlaze::RTSD, MBlaze::BRI, MBlaze::BEQI, //2C,2D,2E,2F
+
+ MBlaze::LBU, MBlaze::LHU, MBlaze::LW, UNSUPPORTED, //30,31,32,33
+ MBlaze::SB, MBlaze::SH, MBlaze::SW, UNSUPPORTED, //34,35,36,37
+ MBlaze::LBUI, MBlaze::LHUI, MBlaze::LWI, UNSUPPORTED, //38,39,3A,3B
+ MBlaze::SBI, MBlaze::SHI, MBlaze::SWI, UNSUPPORTED, //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+ if (!isMBlazeRegister((insn>>21)&0x1F))
+ return UNSUPPORTED;
+ return getMBlazeRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+ if (!getMBlazeRegisterFromNumbering((insn>>16)&0x1F))
+ return UNSUPPORTED;
+ return getMBlazeRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+ if (!getMBlazeRegisterFromNumbering((insn>>11)&0x1F))
+ return UNSUPPORTED;
+ return getMBlazeRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+ if (!isSpecialMBlazeRegister(insn&0x3FFF))
+ return UNSUPPORTED;
+ return getSpecialMBlazeRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+ int16_t val = (insn & 0xFFFF);
+ return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+ int16_t val = (insn & 0x1F);
+ return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+ return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+ int16_t val = (insn & 0xF);
+ return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0: return MBlaze::MUL;
+ case 1: return MBlaze::MULH;
+ case 2: return MBlaze::MULHSU;
+ case 3: return MBlaze::MULHU;
+ }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+ switch (insn&0x7FF) {
+ default: return UNSUPPORTED;
+ case 0x60: return MBlaze::SEXT8;
+ case 0x68: return MBlaze::WIC;
+ case 0x64: return MBlaze::WDC;
+ case 0x66: return MBlaze::WDCC;
+ case 0x74: return MBlaze::WDCF;
+ case 0x61: return MBlaze::SEXT16;
+ case 0x41: return MBlaze::SRL;
+ case 0x21: return MBlaze::SRC;
+ case 0x01: return MBlaze::SRA;
+ }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQ;
+ case 0x10: return MBlaze::BEQD;
+ case 0x05: return MBlaze::BGE;
+ case 0x15: return MBlaze::BGED;
+ case 0x04: return MBlaze::BGT;
+ case 0x14: return MBlaze::BGTD;
+ case 0x03: return MBlaze::BLE;
+ case 0x13: return MBlaze::BLED;
+ case 0x02: return MBlaze::BLT;
+ case 0x12: return MBlaze::BLTD;
+ case 0x01: return MBlaze::BNE;
+ case 0x11: return MBlaze::BNED;
+ }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQI;
+ case 0x10: return MBlaze::BEQID;
+ case 0x05: return MBlaze::BGEI;
+ case 0x15: return MBlaze::BGEID;
+ case 0x04: return MBlaze::BGTI;
+ case 0x14: return MBlaze::BGTID;
+ case 0x03: return MBlaze::BLEI;
+ case 0x13: return MBlaze::BLEID;
+ case 0x02: return MBlaze::BLTI;
+ case 0x12: return MBlaze::BLTID;
+ case 0x01: return MBlaze::BNEI;
+ case 0x11: return MBlaze::BNEID;
+ }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BR;
+ case 0x08: return MBlaze::BRA;
+ case 0x0C: return MBlaze::BRK;
+ case 0x10: return MBlaze::BRD;
+ case 0x14: return MBlaze::BRLD;
+ case 0x18: return MBlaze::BRAD;
+ case 0x1C: return MBlaze::BRALD;
+ }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BRI;
+ case 0x08: return MBlaze::BRAI;
+ case 0x0C: return MBlaze::BRKI;
+ case 0x10: return MBlaze::BRID;
+ case 0x14: return MBlaze::BRLID;
+ case 0x18: return MBlaze::BRAID;
+ case 0x1C: return MBlaze::BRALID;
+ }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLL;
+ case 0x1: return MBlaze::BSRA;
+ case 0x0: return MBlaze::BSRL;
+ }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLLI;
+ case 0x1: return MBlaze::BSRAI;
+ case 0x0: return MBlaze::BSRLI;
+ }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::RSUBK;
+ case 0x1: return MBlaze::CMP;
+ case 0x3: return MBlaze::CMPU;
+ }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::FADD;
+ case 0x080: return MBlaze::FRSUB;
+ case 0x100: return MBlaze::FMUL;
+ case 0x180: return MBlaze::FDIV;
+ case 0x200: return MBlaze::FCMP_UN;
+ case 0x210: return MBlaze::FCMP_LT;
+ case 0x220: return MBlaze::FCMP_EQ;
+ case 0x230: return MBlaze::FCMP_LE;
+ case 0x240: return MBlaze::FCMP_GT;
+ case 0x250: return MBlaze::FCMP_NE;
+ case 0x260: return MBlaze::FCMP_GE;
+ case 0x280: return MBlaze::FLT;
+ case 0x300: return MBlaze::FINT;
+ case 0x380: return MBlaze::FSQRT;
+ }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+ switch ((insn>>10)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GET;
+ case 0x01: return MBlaze::EGET;
+ case 0x02: return MBlaze::AGET;
+ case 0x03: return MBlaze::EAGET;
+ case 0x04: return MBlaze::TGET;
+ case 0x05: return MBlaze::TEGET;
+ case 0x06: return MBlaze::TAGET;
+ case 0x07: return MBlaze::TEAGET;
+ case 0x08: return MBlaze::CGET;
+ case 0x09: return MBlaze::ECGET;
+ case 0x0A: return MBlaze::CAGET;
+ case 0x0B: return MBlaze::ECAGET;
+ case 0x0C: return MBlaze::TCGET;
+ case 0x0D: return MBlaze::TECGET;
+ case 0x0E: return MBlaze::TCAGET;
+ case 0x0F: return MBlaze::TECAGET;
+ case 0x10: return MBlaze::NGET;
+ case 0x11: return MBlaze::NEGET;
+ case 0x12: return MBlaze::NAGET;
+ case 0x13: return MBlaze::NEAGET;
+ case 0x14: return MBlaze::TNGET;
+ case 0x15: return MBlaze::TNEGET;
+ case 0x16: return MBlaze::TNAGET;
+ case 0x17: return MBlaze::TNEAGET;
+ case 0x18: return MBlaze::NCGET;
+ case 0x19: return MBlaze::NECGET;
+ case 0x1A: return MBlaze::NCAGET;
+ case 0x1B: return MBlaze::NECAGET;
+ case 0x1C: return MBlaze::TNCGET;
+ case 0x1D: return MBlaze::TNECGET;
+ case 0x1E: return MBlaze::TNCAGET;
+ case 0x1F: return MBlaze::TNECAGET;
+ case 0x20: return MBlaze::PUT;
+ case 0x22: return MBlaze::APUT;
+ case 0x24: return MBlaze::TPUT;
+ case 0x26: return MBlaze::TAPUT;
+ case 0x28: return MBlaze::CPUT;
+ case 0x2A: return MBlaze::CAPUT;
+ case 0x2C: return MBlaze::TCPUT;
+ case 0x2E: return MBlaze::TCAPUT;
+ case 0x30: return MBlaze::NPUT;
+ case 0x32: return MBlaze::NAPUT;
+ case 0x34: return MBlaze::TNPUT;
+ case 0x36: return MBlaze::TNAPUT;
+ case 0x38: return MBlaze::NCPUT;
+ case 0x3A: return MBlaze::NCAPUT;
+ case 0x3C: return MBlaze::TNCPUT;
+ case 0x3E: return MBlaze::TNCAPUT;
+ }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+ switch ((insn>>5)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GETD;
+ case 0x01: return MBlaze::EGETD;
+ case 0x02: return MBlaze::AGETD;
+ case 0x03: return MBlaze::EAGETD;
+ case 0x04: return MBlaze::TGETD;
+ case 0x05: return MBlaze::TEGETD;
+ case 0x06: return MBlaze::TAGETD;
+ case 0x07: return MBlaze::TEAGETD;
+ case 0x08: return MBlaze::CGETD;
+ case 0x09: return MBlaze::ECGETD;
+ case 0x0A: return MBlaze::CAGETD;
+ case 0x0B: return MBlaze::ECAGETD;
+ case 0x0C: return MBlaze::TCGETD;
+ case 0x0D: return MBlaze::TECGETD;
+ case 0x0E: return MBlaze::TCAGETD;
+ case 0x0F: return MBlaze::TECAGETD;
+ case 0x10: return MBlaze::NGETD;
+ case 0x11: return MBlaze::NEGETD;
+ case 0x12: return MBlaze::NAGETD;
+ case 0x13: return MBlaze::NEAGETD;
+ case 0x14: return MBlaze::TNGETD;
+ case 0x15: return MBlaze::TNEGETD;
+ case 0x16: return MBlaze::TNAGETD;
+ case 0x17: return MBlaze::TNEAGETD;
+ case 0x18: return MBlaze::NCGETD;
+ case 0x19: return MBlaze::NECGETD;
+ case 0x1A: return MBlaze::NCAGETD;
+ case 0x1B: return MBlaze::NECAGETD;
+ case 0x1C: return MBlaze::TNCGETD;
+ case 0x1D: return MBlaze::TNECGETD;
+ case 0x1E: return MBlaze::TNCAGETD;
+ case 0x1F: return MBlaze::TNECAGETD;
+ case 0x20: return MBlaze::PUTD;
+ case 0x22: return MBlaze::APUTD;
+ case 0x24: return MBlaze::TPUTD;
+ case 0x26: return MBlaze::TAPUTD;
+ case 0x28: return MBlaze::CPUTD;
+ case 0x2A: return MBlaze::CAPUTD;
+ case 0x2C: return MBlaze::TCPUTD;
+ case 0x2E: return MBlaze::TCAPUTD;
+ case 0x30: return MBlaze::NPUTD;
+ case 0x32: return MBlaze::NAPUTD;
+ case 0x34: return MBlaze::TNPUTD;
+ case 0x36: return MBlaze::TNAPUTD;
+ case 0x38: return MBlaze::NCPUTD;
+ case 0x3A: return MBlaze::NCAPUTD;
+ case 0x3C: return MBlaze::TNCPUTD;
+ case 0x3E: return MBlaze::TNCAPUTD;
+ }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+ switch (insn&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::IDIV;
+ case 0x2: return MBlaze::IDIVU;
+ }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LBU;
+ case 0x1: return MBlaze::LBUR;
+ }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LHU;
+ case 0x1: return MBlaze::LHUR;
+ }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LW;
+ case 0x1: return MBlaze::LWR;
+ case 0x2: return MBlaze::LWX;
+ }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SB;
+ case 0x1: return MBlaze::SBR;
+ }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SH;
+ case 0x1: return MBlaze::SHR;
+ }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SW;
+ case 0x1: return MBlaze::SWR;
+ case 0x2: return MBlaze::SWX;
+ }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+ switch ((insn>>15)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0:
+ switch ((insn>>16)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MSRSET;
+ case 0x1: return MBlaze::MSRCLR;
+ }
+ case 0x1:
+ switch ((insn>>14)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MFS;
+ case 0x1: return MBlaze::MTS;
+ }
+ }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::OR;
+ case 0x400: return MBlaze::PCMPBF;
+ }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::XOR;
+ case 0x400: return MBlaze::PCMPEQ;
+ }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::ANDN;
+ case 0x400: return MBlaze::PCMPNE;
+ }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x10: return MBlaze::RTSD;
+ case 0x11: return MBlaze::RTID;
+ case 0x12: return MBlaze::RTBD;
+ case 0x14: return MBlaze::RTED;
+ }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+ unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+ switch (opcode) {
+ case MBlaze::MUL: return decodeMUL(insn);
+ case MBlaze::SEXT8: return decodeSEXT(insn);
+ case MBlaze::BEQ: return decodeBEQ(insn);
+ case MBlaze::BEQI: return decodeBEQI(insn);
+ case MBlaze::BR: return decodeBR(insn);
+ case MBlaze::BRI: return decodeBRI(insn);
+ case MBlaze::BSRL: return decodeBSRL(insn);
+ case MBlaze::BSRLI: return decodeBSRLI(insn);
+ case MBlaze::RSUBK: return decodeRSUBK(insn);
+ case MBlaze::FADD: return decodeFADD(insn);
+ case MBlaze::GET: return decodeGET(insn);
+ case MBlaze::GETD: return decodeGETD(insn);
+ case MBlaze::IDIV: return decodeIDIV(insn);
+ case MBlaze::LBU: return decodeLBU(insn);
+ case MBlaze::LHU: return decodeLHU(insn);
+ case MBlaze::LW: return decodeLW(insn);
+ case MBlaze::SB: return decodeSB(insn);
+ case MBlaze::SH: return decodeSH(insn);
+ case MBlaze::SW: return decodeSW(insn);
+ case MBlaze::MFS: return decodeMFS(insn);
+ case MBlaze::OR: return decodeOR(insn);
+ case MBlaze::XOR: return decodeXOR(insn);
+ case MBlaze::ANDN: return decodeANDN(insn);
+ case MBlaze::RTSD: return decodeRTSD(insn);
+ default: return opcode;
+ }
+}
+
+EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+ return instInfoMBlaze;
+}
+
+//
+// Public interface for the disassembler
+//
+
+MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ // The machine instruction.
+ uint32_t insn;
+ uint64_t read;
+ uint8_t bytes[4];
+
+ // By default we consume 1 byte on failure
+ size = 1;
+
+ // We want to read exactly 4 bytes of data.
+ if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
+ return Fail;
+
+ // Encoded as a big-endian 32-bit word in the stream.
+ insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+ // Get the MCInst opcode from the binary instruction and make sure
+ // that it is a valid instruction.
+ unsigned opcode = getOPCODE(insn);
+ if (opcode == UNSUPPORTED)
+ return Fail;
+
+ instr.setOpcode(opcode);
+
+ unsigned RD = getRD(insn);
+ unsigned RA = getRA(insn);
+ unsigned RB = getRB(insn);
+ unsigned RS = getRS(insn);
+
+ uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+ switch ((tsFlags & MBlazeII::FormMask)) {
+ default:
+ return Fail;
+
+ case MBlazeII::FRRRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRI:
+ switch (opcode) {
+ default:
+ return Fail;
+ case MBlaze::MFS:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ break;
+ case MBlaze::MTS:
+ if (RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+ case MBlaze::MSRSET:
+ case MBlaze::MSRCLR:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+ break;
+ }
+ break;
+
+ case MBlazeII::FRRI:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ switch (opcode) {
+ default:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+ case MBlaze::BSRLI:
+ case MBlaze::BSRAI:
+ case MBlaze::BSLLI:
+ instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+ break;
+ }
+ break;
+
+ case MBlazeII::FCRR:
+ if (RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FCRI:
+ if (RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRCR:
+ if (RD == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRCI:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FCCR:
+ if (RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FCCI:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRRCI:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+ break;
+
+ case MBlazeII::FRRC:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRCX:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FRCS:
+ if (RD == UNSUPPORTED || RS == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RS));
+ break;
+
+ case MBlazeII::FCRCS:
+ if (RS == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RS));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FCRCX:
+ if (RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCX:
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCR:
+ if (RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRIR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+ }
+
+ // We always consume 4 bytes of data on success
+ size = 4;
+
+ return Success;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new MBlazeDisassembler(STI);
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+ createMBlazeDisassembler);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 0000000..0ac0d89
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,56 @@
+//===- MBlazeDisassembler.h - Disassembler for MicroBlaze ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ MBlazeDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~MBlazeDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ MCDisassembler::DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 0000000..a1f1dbc
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,71 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlaze.h"
+#include "MBlazeInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << (int32_t)Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << "rfsl" << MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, OpNo, O, NULL);
+ O << ", ";
+ printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 0000000..570ab08
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MBlazeInstPrinter : public MCInstPrinter {
+ public:
+ MBlazeInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+ const char *Modifier = 0);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 0000000..1399b85
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,32 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MBlazeTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+
+ FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+ FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.td b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 0000000..1245658
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,73 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
+ "Implements barrel shifter">;
+def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
+ "Implements hardware divider">;
+def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
+ "Implements hardware multiplier">;
+def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+ "Implements pattern compare instruction">;
+def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
+ "Implements floating point unit">;
+def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
+ "Implements multiplier with 64-bit result">;
+def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
+ "Implements sqrt and floating point convert">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+def : Processor<"mblaze", MBlazeGenericItineraries, []>;
+def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
+def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MBlaze : Target {
+ let InstructionSet = MBlazeInstrInfo;
+ let AssemblyWriters = [MBlazeAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
new file mode 100644
index 0000000..97bd083
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -0,0 +1,322 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeMCInstLower.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+ class MBlazeAsmPrinter : public AsmPrinter {
+ const MBlazeSubtarget *Subtarget;
+ public:
+ explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Assembly Printer";
+ }
+
+ void printSavedRegsBitmask();
+ void emitFrameDirective();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+// #include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+// MBlaze Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame R19,48,R15
+// .mask 0xc0000000,-8
+// addiu R1, R1, -48
+// sw R15, 40(R1)
+// sw R19, 36(R1)
+//
+// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+// 19 (R19) are saved at prologue. As the save order on prologue is from
+// left to right, R15 is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (R15) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+// Print a 32 bit hex number with all numbers.
+static void printHex32(unsigned int Value, raw_ostream &O) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ // CPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+
+ // Set the CPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = getMBlazeRegisterNumbering(Reg);
+ if (MBlaze::GPRRegisterClass->contains(Reg))
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (TFI->hasFP(*MF))
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getFrameRegister(*MF)));
+
+ if (MFI->adjustsStack())
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
+}
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ unsigned stkReg = RI.getFrameRegister(*MF);
+ unsigned retReg = RI.getRARegister();
+ unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+ OutStreamer.EmitRawText("\t.frame\t" +
+ Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+ "," + Twine(stkSze) + "," +
+ Twine(MBlazeInstPrinter::getRegisterName(retReg)));
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ AsmPrinter::EmitFunctionEntryLabel();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
+ emitFrameDirective();
+ printSavedRegsBitmask();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << MBlazeInstPrinter::getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int32_t)MO.getImm();
+ break;
+
+ case MachineOperand::MO_FPImmediate: {
+ const ConstantFP *fp = MO.getFPImm();
+ printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
+ O << ";\t# immediate = " << *fp;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << "rfsl" << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier) {
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+ RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 0000000..4962573
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,28 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+ // i32 are returned in registers R3, R4
+ CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
+
+def CC_MBlaze : CallingConv<[
+ CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+ CCIfType<[i32,f32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 0000000..c07570a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,258 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace llvm {
+cl::opt<bool> DisableDelaySlotFiller(
+ "disable-mblaze-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the MBlaze delay slot filter."),
+ cl::Hidden);
+}
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+ // Any instruction with an immediate mode operand greater than
+ // 16-bits requires an implicit IMM instruction.
+ unsigned numOper = candidate->getNumOperands();
+ for (unsigned op = 0; op < numOper; ++op) {
+ MachineOperand &mop = candidate->getOperand(op);
+
+ // The operand requires more than 16-bits to represent.
+ if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+ return true;
+
+ // We must assume that unknown immediate values require more than
+ // 16-bits to represent.
+ if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI())
+ return true;
+
+ // FIXME: we could probably check to see if the FP value happens
+ // to not need an IMM instruction. For now we just always
+ // assume that FP values do.
+ if (mop.isFPImm())
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+ switch (instr->getOpcode()) {
+ default: return instr->getNumOperands();
+
+ // These instructions have a variable number of operands but the first two
+ // are the "real" operands that we care about during hazard detection.
+ case MBlaze::BRLID:
+ case MBlaze::BRALID:
+ case MBlaze::BRLD:
+ case MBlaze::BRALD:
+ return 2;
+ }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+ MachineBasicBlock::iterator &slot) {
+ // Hazard check
+ MachineBasicBlock::iterator a = candidate;
+ MachineBasicBlock::iterator b = slot;
+ MCInstrDesc desc = candidate->getDesc();
+
+ // MBB layout:-
+ // candidate := a0 = operation(a1, a2)
+ // ...middle bit...
+ // slot := b0 = operation(b1, b2)
+
+ // Possible hazards:-/
+ // 1. a1 or a2 was written during the middle bit
+ // 2. a0 was read or written during the middle bit
+ // 3. a0 is one or more of {b0, b1, b2}
+ // 4. b0 is one or more of {a1, a2}
+ // 5. a accesses memory, and the middle bit
+ // contains a store operation.
+ bool a_is_memory = desc.mayLoad() || desc.mayStore();
+
+ // Determine the number of operands in the slot instruction and in the
+ // candidate instruction.
+ const unsigned aend = getLastRealOperand(a);
+ const unsigned bend = getLastRealOperand(b);
+
+ // Check hazards type 1, 2 and 5 by scanning the middle bit
+ MachineBasicBlock::iterator m = a;
+ for (++m; m != b; ++m) {
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ bool aop_is_reg = a->getOperand(aop).isReg();
+ if (!aop_is_reg) continue;
+
+ bool aop_is_def = a->getOperand(aop).isDef();
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ const unsigned mend = getLastRealOperand(m);
+ for (unsigned mop = 0; mop<mend; ++mop) {
+ bool mop_is_reg = m->getOperand(mop).isReg();
+ if (!mop_is_reg) continue;
+
+ bool mop_is_def = m->getOperand(mop).isDef();
+ unsigned mop_reg = m->getOperand(mop).getReg();
+
+ if (aop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 2, because aop = a0
+ else if (mop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 1, because aop in {a1, a2}
+ }
+ }
+
+ // Check hazard type 5
+ if (a_is_memory && m->getDesc().mayStore())
+ return true;
+ }
+
+ // Check hazard type 3 & 4
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ if (a->getOperand(aop).isReg()) {
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ for (unsigned bop = 0; bop<bend; ++bop) {
+ if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+ unsigned bop_reg = b->getOperand(bop).getReg();
+ if (aop_reg == bop_reg)
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate) {
+ if (candidate == MBB.begin())
+ return false;
+
+ MCInstrDesc brdesc = (--candidate)->getDesc();
+ return (brdesc.hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+ if (!I->hasUnmodeledSideEffects())
+ return false;
+
+ unsigned op = I->getOpcode();
+ if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+ op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+ op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+ op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+ op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+ op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+ return false;
+
+ return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+ MachineBasicBlock::iterator I = slot;
+ while (true) {
+ if (I == MBB.begin())
+ break;
+
+ --I;
+ MCInstrDesc desc = I->getDesc();
+ if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
+ desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+ hasUnknownSideEffects(I))
+ break;
+
+ if (hasImmInstruction(I) || delayHasHazard(I,slot))
+ continue;
+
+ return I;
+ }
+
+ return MBB.end();
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
+ MachineBasicBlock::iterator J = I;
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB,I);
+
+ ++FilledSlots;
+ Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
+ }
+ return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
new file mode 100644
index 0000000..3f26ed1
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeELFWriterInfo.h"
+#include "MBlazeRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the MBlazeELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {}
+
+unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case MBlaze::reloc_pcrel_word:
+ return ELF::R_MICROBLAZE_64_PCREL;
+ case MBlaze::reloc_absolute_word:
+ return ELF::R_MICROBLAZE_NONE;
+ default:
+ llvm_unreachable("unknown mblaze machine relocation type");
+ }
+ return 0;
+}
+
+long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32_PCREL:
+ return Modifier - 4;
+ case ELF::R_MICROBLAZE_32:
+ return Modifier;
+ default:
+ llvm_unreachable("unknown mblaze relocation type");
+ }
+ return 0;
+}
+
+unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ // FIXME: Most of these sizes are guesses based on the name
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32:
+ case ELF::R_MICROBLAZE_32_PCREL:
+ case ELF::R_MICROBLAZE_32_PCREL_LO:
+ case ELF::R_MICROBLAZE_32_LO:
+ case ELF::R_MICROBLAZE_SRO32:
+ case ELF::R_MICROBLAZE_SRW32:
+ case ELF::R_MICROBLAZE_32_SYM_OP_SYM:
+ case ELF::R_MICROBLAZE_GOTOFF_32:
+ return 32;
+
+ case ELF::R_MICROBLAZE_64_PCREL:
+ case ELF::R_MICROBLAZE_64:
+ case ELF::R_MICROBLAZE_GOTPC_64:
+ case ELF::R_MICROBLAZE_GOT_64:
+ case ELF::R_MICROBLAZE_PLT_64:
+ case ELF::R_MICROBLAZE_GOTOFF_64:
+ return 64;
+ }
+
+ return 0;
+}
+
+bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ // FIXME: Most of these are guesses based on the name
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32_PCREL:
+ case ELF::R_MICROBLAZE_64_PCREL:
+ case ELF::R_MICROBLAZE_32_PCREL_LO:
+ case ELF::R_MICROBLAZE_GOTPC_64:
+ return true;
+ }
+
+ return false;
+}
+
+unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return MBlaze::reloc_absolute_word;
+}
+
+long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
+ return SymOffset - (RelOffset + 4);
+ else
+ assert("computeRelocation unknown for this relocation type");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
new file mode 100644
index 0000000..63bfc0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_ELF_WRITER_INFO_H
+#define MBLAZE_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class MBlazeELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ MBlazeELFWriterInfo(TargetMachine &TM);
+ virtual ~MBlazeELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // MBLAZE_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 0000000..f28d5a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,450 @@
+//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+ cl::opt<bool> DisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
+}
+
+static void replaceFrameIndexes(MachineFunction &MF,
+ SmallVector<std::pair<int,int64_t>, 16> &FR) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+ SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+ for (; FRI != FRE; ++FRI) {
+ MFI->RemoveStackObject(FRI->first);
+ int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+ MBlazeFI->recordReplacement(FRI->first, NFI);
+
+ for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+ MachineBasicBlock::iterator MBB = MB->begin();
+ const MachineBasicBlock::iterator MBE = MB->end();
+
+ for (; MBB != MBE; ++MBB) {
+ MachineInstr::mop_iterator MIB = MBB->operands_begin();
+ const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+ for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+ if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+ DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+ MII->setIndex(NFI);
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+ if (DisableStackAdjust) return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+ MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+ const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+ SmallVector<MachineInstr*, 16> EraseInstr;
+ SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+ MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+ MachineBasicBlock::iterator MIB = MBB->begin();
+ MachineBasicBlock::iterator MIE = MBB->end();
+
+ int StackAdjust = 0;
+ int StackOffset = -28;
+
+ // In this loop we are searching frame indexes that corrospond to incoming
+ // arguments that are already in the stack. We look for instruction sequences
+ // like the following:
+ //
+ // LWI REG, FI1, 0
+ // ...
+ // SWI REG, FI2, 0
+ //
+ // As long as there are no defs of REG in the ... part, we can eliminate
+ // the SWI instruction because the value has already been stored to the
+ // stack by the caller. All we need to do is locate FI at the correct
+ // stack location according to the calling convensions.
+ //
+ // Additionally, if the SWI operation kills the def of REG then we don't
+ // need the LWI operation so we can erase it as well.
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+ unsigned FIReg = I->getOperand(0).getReg();
+ MachineBasicBlock::iterator SI = I;
+ for (SI++; SI != MIE; ++SI) {
+ if (!SI->getOperand(0).isReg() ||
+ !SI->getOperand(1).isFI() ||
+ SI->getOpcode() != MBlaze::SWI) continue;
+
+ int FI = SI->getOperand(1).getIndex();
+ if (SI->getOperand(0).getReg() != FIReg ||
+ MFI->isFixedObjectIndex(FI) ||
+ MFI->getObjectSize(FI) != 4) continue;
+
+ if (SI->getOperand(0).isDef()) break;
+
+ if (SI->getOperand(0).isKill()) {
+ DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex()
+ << " removed\n");
+ EraseInstr.push_back(I);
+ }
+
+ EraseInstr.push_back(SI);
+ DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+ FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+ StackOffset -= 4;
+ StackAdjust += 4;
+ break;
+ }
+ }
+ }
+
+ // In this loop we are searching for frame indexes that corrospond to
+ // incoming arguments that are in registers. We look for instruction
+ // sequences like the following:
+ //
+ // ... SWI REG, FI, 0
+ //
+ // As long as the ... part does not define REG and if REG is an incoming
+ // parameter register then we know that, according to ABI convensions, the
+ // caller has allocated stack space for it already. Instead of allocating
+ // stack space on our frame, we record the correct location in the callers
+ // frame.
+ for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->definesRegister(LI->first))
+ break;
+
+ if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() < 0) continue;
+
+ if (I->getOperand(0).getReg() == LI->first) {
+ int FI = I->getOperand(1).getIndex();
+ MBlazeFI->recordLiveIn(FI);
+
+ int FILoc = 0;
+ switch (LI->first) {
+ default: llvm_unreachable("invalid incoming parameter!");
+ case MBlaze::R5: FILoc = -4; break;
+ case MBlaze::R6: FILoc = -8; break;
+ case MBlaze::R7: FILoc = -12; break;
+ case MBlaze::R8: FILoc = -16; break;
+ case MBlaze::R9: FILoc = -20; break;
+ case MBlaze::R10: FILoc = -24; break;
+ }
+
+ StackAdjust += 4;
+ FrameRelocate.push_back(std::make_pair(FI,FILoc));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+ break;
+ }
+ }
+ }
+
+ // Go ahead and erase all of the instructions that we determined were
+ // no longer needed.
+ for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+ MBB->erase(EraseInstr[i]);
+
+ // Replace all of the frame indexes that we have relocated with new
+ // fixed object frame indexes.
+ replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ llvm::CallingConv::ID CallConv = F->getCallingConv();
+
+ // If this function is not using either the interrupt_handler
+ // calling convention or the save_volatiles calling convention
+ // then we don't need to do any additional frame layout.
+ if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
+ CallConv != llvm::CallingConv::MBLAZE_SVOL)
+ return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ // Determine if the calling convention is the interrupt_handler
+ // calling convention. Some pieces of the prologue and epilogue
+ // only need to be emitted if we are lowering and interrupt handler.
+ bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Determine where to put prologue and epilogue additions
+ MachineBasicBlock &MENT = MF.front();
+ MachineBasicBlock &MEXT = MF.back();
+
+ MachineBasicBlock::iterator MENTI = MENT.begin();
+ MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+ DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+ DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+ // Store the frame indexes generated during prologue additions for use
+ // when we are generating the epilogue additions.
+ SmallVector<int, 10> VFI;
+
+ // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+ // always have a SWI because it is used when processing RMSR.
+ for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+ if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+
+ int FI = MFI->CreateStackObject(4,4,false,false);
+ VFI.push_back(FI);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+ .addFrameIndex(FI).addImm(0);
+ }
+
+ // Build the prologue SWI for R17, R18
+ int R17FI = MFI->CreateStackObject(4,4,false,false);
+ int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+ if (isIntr) {
+ int MSRFI = MFI->CreateStackObject(4,4,false,false);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+ .addReg(MBlaze::RMSR);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+ .addReg(MBlaze::R11);
+ }
+
+ // Build the epilogue LWI for R17, R18
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ // Build the epilogue LWI for R3 - R12 if needed
+ for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+ if (!MRI.isPhysRegUsed(r)) continue;
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+ .addFrameIndex(VFI[--i]).addImm(0);
+ }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MBlazeFI->adjustLoadArgsFI(MFI);
+ MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+ DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ // unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ MFI->setStackSize(FrameSize);
+ DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
+ const {
+ const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->hasReplacement(FI))
+ FI = MBlazeFI->getReplacement(FI);
+ return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Determine the correct frame layout
+ determineFrameLayout(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // Adjust stack : addi R1, R1, -imm
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-StackSize);
+
+ // swi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ if (hasFP(MF)) {
+ // swi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+ // add R19, R1, R0
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+ .addReg(MBlaze::R1).addReg(MBlaze::R0);
+ }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ if (hasFP(MF)) {
+ // add R1, R19, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+ .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+ // lwi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+ .addReg(MBlaze::R1).addImm(FPOffset);
+ }
+
+ // lwi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+ .addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ // Get the number of bytes from FrameInfo
+ int StackSize = (int) MFI->getStackSize();
+
+ // addi R1, R1, imm
+ if (StackSize) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(StackSize);
+ }
+}
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ if (MFI->adjustsStack() || requiresRA) {
+ MBlazeFI->setRAStackOffset(0);
+ MFI->CreateFixedObject(4,0,true);
+ }
+
+ if (hasFP(MF)) {
+ MBlazeFI->setFPStackOffset(4);
+ MFI->CreateFixedObject(4,4,true);
+ }
+
+ interruptFrameLayout(MF);
+ analyzeFrameIndexes(MF);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 0000000..8be15bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,53 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+ const MBlazeSubtarget &STI;
+
+public:
+ explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+ }
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 0000000..6b43497
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,277 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MBlazeTargetMachine.
+ MBlazeTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MBlazeSubtarget &Subtarget;
+
+public:
+ explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MBlaze DAG->DAG Pattern Instruction Selection";
+ }
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MBlazeGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MBlazeTargetMachine &getTargetMachine() {
+ return static_cast<const MBlazeTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MBlazeInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Address Selection
+ bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+ bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
+
+ // getI32Imm - Return a target constant with the specified value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::Constant)
+ return false;
+
+ Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+ return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
+ if (N.getOpcode() == ISD::FrameIndex) return false;
+ if (N.getOpcode() == ISD::TargetExternalSymbol ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ int32_t imm = 0;
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ if (isIntS32Immediate(N.getOperand(1), imm))
+ return false; // r+i
+
+ if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+ N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+ return false; // jump tables.
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddrRegReg(N, Base, Disp))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int32_t imm = 0;
+ if (isIntS32Immediate(N.getOperand(1), imm)) {
+ Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+ uint32_t Imm = CN->getZExtValue();
+ Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+ Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+ return true;
+ }
+
+ Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode())
+ return NULL;
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch (Opcode) {
+ default: break;
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+ int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT VT = Node->getValueType(0);
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+ unsigned Opc = MBlaze::ADDIK;
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+ return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+ }
+
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MBlazeISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ((isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)))
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+ MVT::i32, MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+ MVT::Glue, R20Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+ return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 0000000..8ec548f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,1148 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink";
+ case MBlazeISD::GPRel : return "MBlazeISD::GPRel";
+ case MBlazeISD::Wrap : return "MBlazeISD::Wrap";
+ case MBlazeISD::ICmp : return "MBlazeISD::ICmp";
+ case MBlazeISD::Ret : return "MBlazeISD::Ret";
+ case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC";
+ default : return NULL;
+ }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+ : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+ // MBlaze does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
+ if (Subtarget->hasFPU()) {
+ addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ }
+
+ // Floating point operations which are not supported
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // Sign extended loads must be expanded
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // MBlaze has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // If the processor doesn't support multiply then expand it
+ if (!Subtarget->hasMul()) {
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+ }
+
+ // If the processor doesn't support 64-bit multiply then expand
+ if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ }
+
+ // If the processor doesn't support division then expand
+ if (!Subtarget->hasDiv()) {
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ }
+
+ // Expand unsupported conversions
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ // Expand SELECT_CC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // MBlaze doesn't have MUL_LOHI
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+ // MBlaze Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Variable Argument support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
+
+ // Operations not directly supported by MBlaze.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ // MBlaze doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setMinFunctionAlignment(2);
+
+ setStackPointerRegisterToSaveRestore(MBlaze::R1);
+ computeRegisterProperties();
+}
+
+EVT MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode())
+ {
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB)
+ const {
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+
+ case MBlaze::ShiftRL:
+ case MBlaze::ShiftRA:
+ case MBlaze::ShiftL:
+ return EmitCustomShift(MI, MBB);
+
+ case MBlaze::Select_FCC:
+ case MBlaze::Select_CC:
+ return EmitCustomSelect(MI, MBB);
+
+ case MBlaze::CAS32:
+ case MBlaze::SWP32:
+ case MBlaze::LAA32:
+ case MBlaze::LAS32:
+ case MBlaze::LAD32:
+ case MBlaze::LAO32:
+ case MBlaze::LAX32:
+ case MBlaze::LAN32:
+ return EmitCustomAtomic(MI, MBB);
+
+ case MBlaze::MEMBARRIER:
+ // The Microblaze does not need memory barriers. Just delete the pseudo
+ // instruction and finish.
+ MI->eraseFromParent();
+ return MBB;
+ }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+ MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by transferring adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ finish->splice(finish->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the true and fallthrough blocks as its successors.
+ MBB->addSuccessor(loop);
+ MBB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
+
+ unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(31);
+
+ unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0);
+
+ BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+ .addReg(IAMT)
+ .addMBB(finish);
+
+ unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+ .addReg(IAMT).addMBB(MBB)
+ .addReg(NAMT).addMBB(loop);
+
+ if (MI->getOpcode() == MBlaze::ShiftL)
+ BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRA)
+ BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRL)
+ BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+ else
+ llvm_unreachable("Cannot lower unknown shift instruction");
+
+ BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+ .addReg(SAMT)
+ .addImm(-1);
+
+ BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+ .addReg(NAMT)
+ .addMBB(loop);
+
+ BuildMI(*finish, finish->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned Opc;
+ switch (MI->getOperand(4).getImm()) {
+ default: llvm_unreachable("Unknown branch condition");
+ case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+ case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+ case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+ case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+ case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+ case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+ }
+
+ F->insert(It, flsBB);
+ F->insert(It, dneBB);
+
+ // Transfer the remainder of MBB and its successor edges to dneBB.
+ dneBB->splice(dneBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ MBB->addSuccessor(flsBB);
+ MBB->addSuccessor(dneBB);
+ flsBB->addSuccessor(dneBB);
+
+ BuildMI(MBB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+ // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+ BuildMI(*dneBB, dneBB->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // All atomic instructions on the Microblaze are implemented using the
+ // load-linked / store-conditional style atomic instruction sequences.
+ // Thus, all operations will look something like the following:
+ //
+ // start:
+ // lwx RV, RP, 0
+ // <do stuff>
+ // swx RV, RP, 0
+ // addic RC, R0, 0
+ // bneid RC, start
+ //
+ // exit:
+ //
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+
+ // Create the start and exit basic blocks for the atomic operation
+ MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, start);
+ F->insert(It, exit);
+
+ // Update machine-CFG edges by transferring adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the fallthrough block as its successors.
+ MBB->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ MachineBasicBlock *final = start;
+ unsigned finalReg = 0;
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+ case MBlaze::SWP32:
+ finalReg = MI->getOperand(2).getReg();
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+ break;
+
+ case MBlaze::LAN32:
+ case MBlaze::LAX32:
+ case MBlaze::LAO32:
+ case MBlaze::LAD32:
+ case MBlaze::LAS32:
+ case MBlaze::LAA32: {
+ unsigned opcode = 0;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic load!");
+ case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+ case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+ case MBlaze::LAD32: opcode = MBlaze::AND; break;
+ case MBlaze::LAO32: opcode = MBlaze::OR; break;
+ case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+ case MBlaze::LAN32: opcode = MBlaze::AND; break;
+ }
+
+ finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(opcode), finalReg)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ if (MI->getOpcode() == MBlaze::LAN32) {
+ unsigned tmp = finalReg;
+ finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+ .addReg(tmp)
+ .addImm(-1);
+ }
+ break;
+ }
+
+ case MBlaze::CAS32: {
+ finalReg = MI->getOperand(3).getReg();
+ final = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, final);
+ start->addSuccessor(exit);
+ start->addSuccessor(final);
+ final->addSuccessor(exit);
+ final->addSuccessor(start);
+
+ unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ BuildMI(start, dl, TII->get(MBlaze::BNEID))
+ .addReg(CMP)
+ .addMBB(exit);
+
+ final->moveAfter(start);
+ exit->moveAfter(final);
+ break;
+ }
+ }
+
+ unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(final, dl, TII->get(MBlaze::SWX))
+ .addReg(finalReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+ .addReg(MBlaze::R0)
+ .addImm(0);
+
+ BuildMI(final, dl, TII->get(MBlaze::BNEID))
+ .addReg(CHK)
+ .addMBB(start);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return exit;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ Opc = MBlazeISD::Select_CC;
+ CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+ .getValue(1);
+ } else {
+ llvm_unreachable("Cannot lower select_cc with unknown type");
+ }
+
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("TLS not implemented for MicroBlaze.");
+ return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = N->getConstVal();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), 0);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
+ MBlaze::R5, MBlaze::R6, MBlaze::R7,
+ MBlaze::R8, MBlaze::R9, MBlaze::R10
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+ unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+ if (!Reg) return false;
+
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // MBlaze does not yet support tail call optimization
+ isTailCall = false;
+
+ // The MBlaze requires stack slots for arguments passed to var arg
+ // functions even if they are passed in registers.
+ bool needsRegArgSlots = isVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Variable argument function calls require a minimum of 24-bytes of stack
+ if (isVarArg && NumBytes < 24) NumBytes = 24;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Since we are alread passing values on the stack we don't
+ // need to worry about creating additional slots for the
+ // values passed via registers.
+ needsRegArgSlots = false;
+
+ // Create the frame index object for this incoming parameter
+ unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // If we need to reserve stack space for the arguments passed via registers
+ // then create a fixed stack object at the beginning of the stack.
+ if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+ MFI->CreateFixedObject(28,0,true);
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0, 0);
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), 0);
+
+ // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+ MBlazeFI->setVarArgsFrameIndex(0);
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Keep track of the last register used for arguments
+ unsigned ArgRegEnd = 0;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+ SDValue StackPtr;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ ArgRegEnd = VA.getLocReg();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = MBlaze::GPRRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = MBlaze::GPRRegisterClass;
+ else
+ llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size. If if is a floating point value
+ // then convert to the correct type.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ InVals.push_back(ArgValue);
+ } else { // VA.isRegLoc()
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The last argument is not a register
+ ArgRegEnd = 0;
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
+ MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+ MBlazeFI->recordLiveIn(FI);
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame. If the last
+ // argument was placed in the stack, there's no need to save any register.
+ if ((isVarArg) && ArgRegEnd) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The last register argument that must be saved is MBlaze::R10
+ TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+
+ unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
+ unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
+ unsigned End = getMBlazeRegisterNumbering(MBlaze::R10);
+ unsigned StackLoc = Start - Begin + 1;
+
+ for (; Start <= End; ++Start, ++StackLoc) {
+ unsigned Reg = getMBlazeRegisterFromNumbering(Start);
+ unsigned LiveReg = MF.addLiveIn(Reg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+ int FI = MFI->CreateFixedObject(4, 0, true);
+ MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ if (!MBlazeFI->getVarArgsFrameIndex())
+ MBlazeFI->setVarArgsFrameIndex(FI);
+ }
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // If this function is using the interrupt_handler calling convention
+ // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+ unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
+ SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+
+ if (Flag.getNode())
+ return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+
+ return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // MBlaze specific constrainy
+ //
+ // 'd' : An address register. Equivalent to r.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, MBlaze::GPRRegisterClass);
+ // TODO: These can't possibly be right, but match what was in
+ // getRegClassForInlineAsmConstraint.
+ case 'd':
+ case 'y':
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, MBlaze::GPRRegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The MBlaze target isn't yet aware of offsets.
+ return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ return VT != MVT::f32;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 0000000..8b49bc3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,185 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+ namespace MBlazeCC {
+ enum CC {
+ FIRST = 0,
+ EQ,
+ NE,
+ GT,
+ LT,
+ GE,
+ LE
+ };
+
+ inline static CC getOppositeCondition(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case GT: return LE;
+ case LT: return GE;
+ case GE: return LT;
+ case LE: return GE;
+ }
+ }
+
+ inline static const char *MBlazeCCToString(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return "eq";
+ case NE: return "ne";
+ case GT: return "gt";
+ case LT: return "lt";
+ case GE: return "ge";
+ case LE: return "le";
+ }
+ }
+ }
+
+ namespace MBlazeISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Select CC Pseudo Instruction
+ Select_CC,
+
+ // Wrap up multiple types of instructions
+ Wrap,
+
+ // Integer Compare
+ ICmp,
+
+ // Return from subroutine
+ Ret,
+
+ // Return from interrupt
+ IRet
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MBlazeTargetLowering : public TargetLowering {
+ public:
+ explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ EVT getSetCCResultType(EVT VT) const;
+
+ private:
+ // Subtarget Info
+ const MBlazeSubtarget *Subtarget;
+
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ // Lower Operand specifics
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 0000000..4acdcfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,219 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+let rb=0 in {
+ class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+ def FORI : LogicFI<0x28, "ori ">;
+ def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIC_FPU>;
+ def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIC_FPU>;
+ def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIC_FPU>;
+ def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIC_FPUd>;
+}
+
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+ def LWF : LoadFM<0x32, "lw ", load>;
+ def LWFI : LoadFMI<0x3A, "lwi ", load>;
+
+ def SWF : StoreFM<0x36, "sw ", store>;
+ def SWFI : StoreFMI<0x3E, "swi ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+ def FLT : ArithIF<0x16, 0x280, "flt ", IIC_FPUf>;
+ def FINT : ArithFI<0x16, 0x300, "fint ", IIC_FPUi>;
+ def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIC_FPUs>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>;
+ def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>;
+ def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>;
+ def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>;
+ def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>;
+ def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>;
+ def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>;
+}
+
+
+let usesCustomInserter = 1 in {
+ def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
+ "; SELECT_FCC PSEUDO!",
+ []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+ def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+ def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+ def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (XOR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_NE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+ (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 0000000..3082a7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,229 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg>
+{
+ bits<5> rd;
+ bits<4> fslno;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x0;
+ let Inst{17-21} = flags; // NCTAE
+ let Inst{22-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg>
+{
+ bits<5> rd;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x0;
+ let Inst{22-26} = flags; // NCTAE
+ let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp>
+{
+ bits<5> ra;
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, GPR:$b)], IIC_FSLp>
+{
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode immZExt4:$b)], IIC_FSLp>
+{
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode GPR:$b)], IIC_FSLp>
+{
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET : FSLGet<0x1B, 0x00, "get ", int_mblaze_fsl_get>;
+def AGET : FSLGet<0x1B, 0x02, "aget ", int_mblaze_fsl_aget>;
+def CGET : FSLGet<0x1B, 0x08, "cget ", int_mblaze_fsl_cget>;
+def CAGET : FSLGet<0x1B, 0x0A, "caget ", int_mblaze_fsl_caget>;
+def EGET : FSLGet<0x1B, 0x01, "eget ", int_mblaze_fsl_eget>;
+def EAGET : FSLGet<0x1B, 0x03, "eaget ", int_mblaze_fsl_eaget>;
+def ECGET : FSLGet<0x1B, 0x09, "ecget ", int_mblaze_fsl_ecget>;
+def ECAGET : FSLGet<0x1B, 0x0B, "ecaget ", int_mblaze_fsl_ecaget>;
+def TGET : FSLGet<0x1B, 0x04, "tget ", int_mblaze_fsl_tget>;
+def TAGET : FSLGet<0x1B, 0x06, "taget ", int_mblaze_fsl_taget>;
+def TCGET : FSLGet<0x1B, 0x0C, "tcget ", int_mblaze_fsl_tcget>;
+def TCAGET : FSLGet<0x1B, 0x0E, "tcaget ", int_mblaze_fsl_tcaget>;
+def TEGET : FSLGet<0x1B, 0x05, "teget ", int_mblaze_fsl_teget>;
+def TEAGET : FSLGet<0x1B, 0x07, "teaget ", int_mblaze_fsl_teaget>;
+def TECGET : FSLGet<0x1B, 0x0D, "tecget ", int_mblaze_fsl_tecget>;
+def TECAGET : FSLGet<0x1B, 0x0F, "tecaget ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGET : FSLGet<0x1B, 0x10, "nget ", int_mblaze_fsl_nget>;
+ def NAGET : FSLGet<0x1B, 0x12, "naget ", int_mblaze_fsl_naget>;
+ def NCGET : FSLGet<0x1B, 0x18, "ncget ", int_mblaze_fsl_ncget>;
+ def NCAGET : FSLGet<0x1B, 0x1A, "ncaget ", int_mblaze_fsl_ncaget>;
+ def NEGET : FSLGet<0x1B, 0x11, "neget ", int_mblaze_fsl_neget>;
+ def NEAGET : FSLGet<0x1B, 0x13, "neaget ", int_mblaze_fsl_neaget>;
+ def NECGET : FSLGet<0x1B, 0x19, "necget ", int_mblaze_fsl_necget>;
+ def NECAGET : FSLGet<0x1B, 0x1B, "necaget ", int_mblaze_fsl_necaget>;
+ def TNGET : FSLGet<0x1B, 0x14, "tnget ", int_mblaze_fsl_tnget>;
+ def TNAGET : FSLGet<0x1B, 0x16, "tnaget ", int_mblaze_fsl_tnaget>;
+ def TNCGET : FSLGet<0x1B, 0x1C, "tncget ", int_mblaze_fsl_tncget>;
+ def TNCAGET : FSLGet<0x1B, 0x1E, "tncaget ", int_mblaze_fsl_tncaget>;
+ def TNEGET : FSLGet<0x1B, 0x15, "tneget ", int_mblaze_fsl_tneget>;
+ def TNEAGET : FSLGet<0x1B, 0x17, "tneaget ", int_mblaze_fsl_tneaget>;
+ def TNECGET : FSLGet<0x1B, 0x1D, "tnecget ", int_mblaze_fsl_tnecget>;
+ def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD : FSLGetD<0x13, 0x00, "getd ", int_mblaze_fsl_get>;
+def AGETD : FSLGetD<0x13, 0x02, "agetd ", int_mblaze_fsl_aget>;
+def CGETD : FSLGetD<0x13, 0x08, "cgetd ", int_mblaze_fsl_cget>;
+def CAGETD : FSLGetD<0x13, 0x0A, "cagetd ", int_mblaze_fsl_caget>;
+def EGETD : FSLGetD<0x13, 0x01, "egetd ", int_mblaze_fsl_eget>;
+def EAGETD : FSLGetD<0x13, 0x03, "eagetd ", int_mblaze_fsl_eaget>;
+def ECGETD : FSLGetD<0x13, 0x09, "ecgetd ", int_mblaze_fsl_ecget>;
+def ECAGETD : FSLGetD<0x13, 0x0B, "ecagetd ", int_mblaze_fsl_ecaget>;
+def TGETD : FSLGetD<0x13, 0x04, "tgetd ", int_mblaze_fsl_tget>;
+def TAGETD : FSLGetD<0x13, 0x06, "tagetd ", int_mblaze_fsl_taget>;
+def TCGETD : FSLGetD<0x13, 0x0C, "tcgetd ", int_mblaze_fsl_tcget>;
+def TCAGETD : FSLGetD<0x13, 0x0E, "tcagetd ", int_mblaze_fsl_tcaget>;
+def TEGETD : FSLGetD<0x13, 0x05, "tegetd ", int_mblaze_fsl_teget>;
+def TEAGETD : FSLGetD<0x13, 0x07, "teagetd ", int_mblaze_fsl_teaget>;
+def TECGETD : FSLGetD<0x13, 0x0D, "tecgetd ", int_mblaze_fsl_tecget>;
+def TECAGETD : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGETD : FSLGetD<0x13, 0x10, "ngetd ", int_mblaze_fsl_nget>;
+ def NAGETD : FSLGetD<0x13, 0x12, "nagetd ", int_mblaze_fsl_naget>;
+ def NCGETD : FSLGetD<0x13, 0x18, "ncgetd ", int_mblaze_fsl_ncget>;
+ def NCAGETD : FSLGetD<0x13, 0x1A, "ncagetd ", int_mblaze_fsl_ncaget>;
+ def NEGETD : FSLGetD<0x13, 0x11, "negetd ", int_mblaze_fsl_neget>;
+ def NEAGETD : FSLGetD<0x13, 0x13, "neagetd ", int_mblaze_fsl_neaget>;
+ def NECGETD : FSLGetD<0x13, 0x19, "necgetd ", int_mblaze_fsl_necget>;
+ def NECAGETD : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+ def TNGETD : FSLGetD<0x13, 0x14, "tngetd ", int_mblaze_fsl_tnget>;
+ def TNAGETD : FSLGetD<0x13, 0x16, "tnagetd ", int_mblaze_fsl_tnaget>;
+ def TNCGETD : FSLGetD<0x13, 0x1C, "tncgetd ", int_mblaze_fsl_tncget>;
+ def TNCAGETD : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+ def TNEGETD : FSLGetD<0x13, 0x15, "tnegetd ", int_mblaze_fsl_tneget>;
+ def TNEAGETD : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+ def TNECGETD : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+ def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT : FSLPut<0x1B, 0x0, "put ", int_mblaze_fsl_put>;
+def APUT : FSLPut<0x1B, 0x1, "aput ", int_mblaze_fsl_aput>;
+def CPUT : FSLPut<0x1B, 0x4, "cput ", int_mblaze_fsl_cput>;
+def CAPUT : FSLPut<0x1B, 0x5, "caput ", int_mblaze_fsl_caput>;
+def TPUT : FSLPutT<0x1B, 0x2, "tput ", int_mblaze_fsl_tput>;
+def TAPUT : FSLPutT<0x1B, 0x3, "taput ", int_mblaze_fsl_taput>;
+def TCPUT : FSLPutT<0x1B, 0x6, "tcput ", int_mblaze_fsl_tcput>;
+def TCAPUT : FSLPutT<0x1B, 0x7, "tcaput ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUT : FSLPut<0x1B, 0x8, "nput ", int_mblaze_fsl_nput>;
+ def NAPUT : FSLPut<0x1B, 0x9, "naput ", int_mblaze_fsl_naput>;
+ def NCPUT : FSLPut<0x1B, 0xC, "ncput ", int_mblaze_fsl_ncput>;
+ def NCAPUT : FSLPut<0x1B, 0xD, "ncaput ", int_mblaze_fsl_ncaput>;
+ def TNPUT : FSLPutT<0x1B, 0xA, "tnput ", int_mblaze_fsl_tnput>;
+ def TNAPUT : FSLPutT<0x1B, 0xB, "tnaput ", int_mblaze_fsl_tnaput>;
+ def TNCPUT : FSLPutT<0x1B, 0xE, "tncput ", int_mblaze_fsl_tncput>;
+ def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput ", int_mblaze_fsl_tncaput>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD : FSLPutD<0x13, 0x0, "putd ", int_mblaze_fsl_put>;
+def APUTD : FSLPutD<0x13, 0x1, "aputd ", int_mblaze_fsl_aput>;
+def CPUTD : FSLPutD<0x13, 0x4, "cputd ", int_mblaze_fsl_cput>;
+def CAPUTD : FSLPutD<0x13, 0x5, "caputd ", int_mblaze_fsl_caput>;
+def TPUTD : FSLPutTD<0x13, 0x2, "tputd ", int_mblaze_fsl_tput>;
+def TAPUTD : FSLPutTD<0x13, 0x3, "taputd ", int_mblaze_fsl_taput>;
+def TCPUTD : FSLPutTD<0x13, 0x6, "tcputd ", int_mblaze_fsl_tcput>;
+def TCAPUTD : FSLPutTD<0x13, 0x7, "tcaputd ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUTD : FSLPutD<0x13, 0x8, "nputd ", int_mblaze_fsl_nput>;
+ def NAPUTD : FSLPutD<0x13, 0x9, "naputd ", int_mblaze_fsl_naput>;
+ def NCPUTD : FSLPutD<0x13, 0xC, "ncputd ", int_mblaze_fsl_ncput>;
+ def NCAPUTD : FSLPutD<0x13, 0xD, "ncaputd ", int_mblaze_fsl_ncaput>;
+ def TNPUTD : FSLPutTD<0x13, 0xA, "tnputd ", int_mblaze_fsl_tnput>;
+ def TNAPUTD : FSLPutTD<0x13, 0xB, "tnaputd ", int_mblaze_fsl_tnaput>;
+ def TNCPUTD : FSLPutTD<0x13, 0xE, "tncputd ", int_mblaze_fsl_tncput>;
+ def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 0000000..54f605f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,204 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR : Format<1>; // ADD, OR, etc.
+def FRRI : Format<2>; // ADDI, ORI, etc.
+def FCRR : Format<3>; // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI : Format<4>; // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR : Format<5>; // BRLD, BRALD, GETD
+def FRCI : Format<6>; // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR : Format<7>; // BR, BRA, BRD, etc.
+def FCCI : Format<8>; // IMM, BRI, BRAI, BRID, etc.
+def FRRCI : Format<9>; // BSRLI, BSRAI, BSLLI
+def FRRC : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX : Format<11>; // GET
+def FRCS : Format<12>; // MFS
+def FCRCS : Format<13>; // MTS
+def FCRCX : Format<14>; // PUT
+def FCX : Format<15>; // TPUT
+def FCR : Format<16>; // TPUTD
+def FRIR : Format<17>; // RSUBI
+def FRRRR : Format<18>; // RSUB, FRSUB
+def FRI : Format<19>; // RSUB, FRSUB
+def FC : Format<20>; // NOP
+
+//===----------------------------------------------------------------------===//
+// Describe MBlaze instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rd - dst reg.
+// ra - first src. reg.
+// rb - second src. reg.
+// imm16 - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> : Instruction {
+ let Namespace = "MBlaze";
+ field bits<32> Inst;
+
+ bits<6> opcode = op;
+ Format Form = form;
+ bits<6> FormBits = Form.Value;
+
+ // Top 6 bits are the 'opcode' field
+ let Inst{0-5} = opcode;
+
+ // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+ // assembler and disassmbler ignore it.
+ let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+ let TSFlags{5-0} = FormBits;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ TA<op, flags, outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rrd;
+ bits<5> rrb;
+ bits<5> rra;
+
+ let Form = FRRRR;
+
+ let rd = rrd;
+ let ra = rra;
+ let rb = rrb;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ TB<op, outs, ins, asmstr, pattern, itin> {
+ bits<5> rrd;
+ bits<16> rimm16;
+ bits<5> rra;
+
+ let Form = FRIR;
+
+ let rd = rrd;
+ let ra = rra;
+ let imm16 = rimm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> imm5;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = 0x0;
+ let Inst{21-22} = flags;
+ let Inst{23-26} = 0x0;
+ let Inst{27-31} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<14> imm14;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-17} = flags;
+ let Inst{18-31} = imm14;
+}
+
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<15> imm15;
+
+ let Inst{6-10} = rd;
+ let Inst{11-16} = flags;
+ let Inst{17-31} = imm15;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 0000000..7ae05b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,297 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+ : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::LWI) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::SWI) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (MBlaze::isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with something like BEQID then BRID, handle it.
+ if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it.
+ // The second one is not executed, so remove it.
+ if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "MBlaze branch conditions have two components!");
+
+ unsigned Opc = MBlaze::BRID;
+ if (!Cond.empty())
+ Opc = (unsigned)Cond[0].getImm();
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+ return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+
+ if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+ !MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand>
+ &Cond) const {
+ assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+ switch (Cond[0].getImm()) {
+ default: return true;
+ case MBlaze::BEQ: Cond[0].setImm(MBlaze::BNE); return false;
+ case MBlaze::BNE: Cond[0].setImm(MBlaze::BEQ); return false;
+ case MBlaze::BGT: Cond[0].setImm(MBlaze::BLE); return false;
+ case MBlaze::BGE: Cond[0].setImm(MBlaze::BLT); return false;
+ case MBlaze::BLT: Cond[0].setImm(MBlaze::BGE); return false;
+ case MBlaze::BLE: Cond[0].setImm(MBlaze::BGT); return false;
+ case MBlaze::BEQI: Cond[0].setImm(MBlaze::BNEI); return false;
+ case MBlaze::BNEI: Cond[0].setImm(MBlaze::BEQI); return false;
+ case MBlaze::BGTI: Cond[0].setImm(MBlaze::BLEI); return false;
+ case MBlaze::BGEI: Cond[0].setImm(MBlaze::BLTI); return false;
+ case MBlaze::BLTI: Cond[0].setImm(MBlaze::BGEI); return false;
+ case MBlaze::BLEI: Cond[0].setImm(MBlaze::BGTI); return false;
+ case MBlaze::BEQD: Cond[0].setImm(MBlaze::BNED); return false;
+ case MBlaze::BNED: Cond[0].setImm(MBlaze::BEQD); return false;
+ case MBlaze::BGTD: Cond[0].setImm(MBlaze::BLED); return false;
+ case MBlaze::BGED: Cond[0].setImm(MBlaze::BLTD); return false;
+ case MBlaze::BLTD: Cond[0].setImm(MBlaze::BGED); return false;
+ case MBlaze::BLED: Cond[0].setImm(MBlaze::BGTD); return false;
+ case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+ case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+ case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+ case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+ case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+ case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+ }
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+ unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(MBlaze::R20);
+ RegInfo.addLiveIn(MBlaze::R20);
+
+ MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 0000000..7174405
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,240 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MBlazeGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace MBlaze {
+
+ // MBlaze Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // MBlaze Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_EQ,
+ COND_GT,
+ COND_GE,
+ COND_LT,
+ COND_LE,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ inline static unsigned GetCondBranchFromCond(CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case COND_EQ: return MBlaze::BEQID;
+ case COND_NE: return MBlaze::BNEID;
+ case COND_GT: return MBlaze::BGTID;
+ case COND_GE: return MBlaze::BGEID;
+ case COND_LT: return MBlaze::BLTID;
+ case COND_LE: return MBlaze::BLEID;
+ }
+ }
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+ /// MBlazeCCToString - Map each FP condition code to its string
+ inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+
+ inline static bool isUncondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BRI:
+ case MBlaze::BRAI:
+ case MBlaze::BRID:
+ case MBlaze::BRAID:
+ return true;
+ }
+ }
+
+ inline static bool isCondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BEQI: case MBlaze::BEQID:
+ case MBlaze::BNEI: case MBlaze::BNEID:
+ case MBlaze::BGTI: case MBlaze::BGTID:
+ case MBlaze::BGEI: case MBlaze::BGEID:
+ case MBlaze::BLTI: case MBlaze::BLTID:
+ case MBlaze::BLEI: case MBlaze::BLEID:
+ return true;
+ }
+ }
+}
+
+class MBlazeInstrInfo : public MBlazeGenInstrInfo {
+ MBlazeTargetMachine &TM;
+ const MBlazeRegisterInfo RI;
+public:
+ explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 0000000..1d8c987
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,889 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze type profiles
+//===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeIRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
+
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+ [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+ SDNPVariadic]>;
+
+def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+// def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel : Predicate<"Subtarget.hasBarrel()">;
+// def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv : Predicate<"Subtarget.hasDiv()">;
+def HasMul : Predicate<"Subtarget.hasMul()">;
+// def HasFSL : Predicate<"Subtarget.hasFSL()">;
+// def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
+// def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
+// def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU : Predicate<"Subtarget.hasFPU()">;
+// def HasESR : Predicate<"Subtarget.hasESR()">;
+// def HasPVR : Predicate<"Subtarget.hasPVR()">;
+def HasMul64 : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt : Predicate<"Subtarget.hasSqrt()">;
+// def HasMMU : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+def MBlazeMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+ let Name = "Fsl";
+ let SuperClasses = [];
+}
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def uimm5 : Operand<i32>;
+def uimm15 : Operand<i32>;
+def fimm : Operand<f32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm : Operand<i32> {
+ let PrintMethod = "printFSLImm";
+ let ParserMatchClass = MBlazeFslAsmOperand;
+}
+
+// Address operand
+def memri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR, simm16);
+ let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+def memrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR, GPR);
+ let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+def immSExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+ "#ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MBlazePseudo<(outs),
+ (ins uimm16:$amt1, simm16:$amt2),
+ "#ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+ IIC_ALU>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1 in {
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [], IIC_MEMl>;
+}
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+let mayStore = 1 in {
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [], IIC_MEMs>;
+}
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs), (ins GPR:$dst, memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIC_BR> {
+ let rd = 0x0;
+ let ra = br;
+ let Form = FCCR;
+}
+
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIC_BR> {
+ let rd = 0;
+ let ra = br;
+ let Form = FCCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIC_BRl> {
+ let ra = br;
+ let Form = FRCR;
+}
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIC_BRl> {
+ let ra = br;
+ let Form = FRCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs),
+ (ins GPR:$a, GPR:$b),
+ !strconcat(instr_asm, " $a, $b"),
+ [], IIC_BRc> {
+ let rd = br;
+ let Form = FCRR;
+}
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $offset"),
+ [], IIC_BRc> {
+ let rd = br;
+ let Form = FCRI;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+ def ADDK : Arith<0x04, 0x000, "addk ", add, IIC_ALU>;
+ def AND : Logic<0x21, 0x000, "and ", and>;
+ def OR : Logic<0x20, 0x000, "or ", or>;
+ def XOR : Logic<0x22, 0x000, "xor ", xor>;
+
+ let Predicates=[HasPatCmp] in {
+ def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+ def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+ def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+ }
+
+ let Defs = [CARRY] in {
+ def ADD : Arith<0x00, 0x000, "add ", addc, IIC_ALU>;
+
+ let Uses = [CARRY] in {
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIC_ALU>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDKC : ArithN<0x06, 0x000, "addkc ", IIC_ALU>;
+ }
+}
+
+let isAsCheapAsAMove = 1 in {
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIC_ALU>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIC_ALU>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIC_ALU>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIC_ALU>;
+
+ let Defs = [CARRY] in {
+ def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIC_ALU>;
+
+ let Uses = [CARRY] in {
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIC_ALU>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>;
+ }
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIC_ALUm>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIC_ALUm>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIC_ALUm>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>;
+}
+
+let Predicates=[HasBarrel] in {
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIC_SHT>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIC_SHT>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIC_SHT>;
+ def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>;
+ def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>;
+ def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+ def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIC_ALUd>;
+ def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIC_ALUd>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+ def ADDIK : ArithI<0x0C, "addik ", add, simm16, immSExt16>;
+ def RSUBIK : ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+ def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
+ def ANDI : LogicI<0x29, "andi ", and>;
+ def ORI : LogicI<0x28, "ori ", or>;
+ def XORI : LogicI<0x2A, "xori ", xor>;
+
+ let Defs = [CARRY] in {
+ def ADDI : ArithI<0x08, "addi ", addc, simm16, immSExt16>;
+ def RSUBI : ArithRI<0x09, "rsubi ", subc, simm16, immSExt16>;
+
+ let Uses = [CARRY] in {
+ def ADDIC : ArithI<0x0A, "addic ", adde, simm16, immSExt16>;
+ def RSUBIC : ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDIKC : ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+ def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+ }
+}
+
+let Predicates=[HasMul] in {
+ def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ let neverHasSideEffects = 1 in {
+ def LBU : LoadM<0x30, 0x000, "lbu ">;
+ def LBUR : LoadM<0x30, 0x200, "lbur ">;
+
+ def LHU : LoadM<0x31, 0x000, "lhu ">;
+ def LHUR : LoadM<0x31, 0x200, "lhur ">;
+
+ def LW : LoadM<0x32, 0x000, "lw ">;
+ def LWR : LoadM<0x32, 0x200, "lwr ">;
+
+ let Defs = [CARRY] in {
+ def LWX : LoadM<0x32, 0x400, "lwx ">;
+ }
+ }
+
+ def LBUI : LoadMI<0x38, "lbui ", zextloadi8>;
+ def LHUI : LoadMI<0x39, "lhui ", zextloadi16>;
+ def LWI : LoadMI<0x3A, "lwi ", load>;
+}
+
+def SB : StoreM<0x34, 0x000, "sb ">;
+def SBR : StoreM<0x34, 0x200, "sbr ">;
+
+def SH : StoreM<0x35, 0x000, "sh ">;
+def SHR : StoreM<0x35, 0x200, "shr ">;
+
+def SW : StoreM<0x36, 0x000, "sw ">;
+def SWR : StoreM<0x36, 0x200, "swr ">;
+
+let Defs = [CARRY] in {
+ def SWX : StoreM<0x36, 0x400, "swx ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRI : BranchI<0x2E, 0x00, "bri ">;
+ def BRAI : BranchI<0x2E, 0x08, "brai ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BEQI : BranchCI<0x2F, 0x00, "beqi ">;
+ def BNEI : BranchCI<0x2F, 0x01, "bnei ">;
+ def BLTI : BranchCI<0x2F, 0x02, "blti ">;
+ def BLEI : BranchCI<0x2F, 0x03, "blei ">;
+ def BGTI : BranchCI<0x2F, 0x04, "bgti ">;
+ def BGEI : BranchCI<0x2F, 0x05, "bgei ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BR : Branch<0x26, 0x00, 0x000, "br ">;
+ def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BEQ : BranchC<0x27, 0x00, 0x000, "beq ">;
+ def BNE : BranchC<0x27, 0x01, 0x000, "bne ">;
+ def BLT : BranchC<0x27, 0x02, 0x000, "blt ">;
+ def BLE : BranchC<0x27, 0x03, 0x000, "ble ">;
+ def BGT : BranchC<0x27, 0x04, 0x000, "bgt ">;
+ def BGE : BranchC<0x27, 0x05, 0x000, "bge ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BRID : BranchI<0x2E, 0x10, "brid ">;
+ def BRAID : BranchI<0x2E, 0x18, "braid ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BEQID : BranchCI<0x2F, 0x10, "beqid ">;
+ def BNEID : BranchCI<0x2F, 0x11, "bneid ">;
+ def BLTID : BranchCI<0x2F, 0x12, "bltid ">;
+ def BLEID : BranchCI<0x2F, 0x13, "bleid ">;
+ def BGTID : BranchCI<0x2F, 0x14, "bgtid ">;
+ def BGEID : BranchCI<0x2F, 0x15, "bgeid ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
+ def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ">;
+ def BNED : BranchC<0x27, 0x11, 0x000, "bned ">;
+ def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ">;
+ def BLED : BranchC<0x27, 0x13, 0x000, "bled ">;
+ def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ">;
+ def BGED : BranchC<0x27, 0x15, 0x000, "bged ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
+ def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
+ def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x10, Form=FCRI in {
+ def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtsd $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x11, Form=FCRI in {
+ def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtid $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x12, Form=FCRI in {
+ def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtbd $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x14, Form=FCRI in {
+ def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rted $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
+}
+
+let usesCustomInserter = 1 in {
+ def Select_CC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
+ "; SELECT_CC PSEUDO!",
+ []>;
+
+ def ShiftL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftL PSEUDO!",
+ []>;
+
+ def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftRA PSEUDO!",
+ []>;
+
+ def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftRL PSEUDO!",
+ []>;
+}
+
+let rb = 0 in {
+ def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+ "sext16 $dst, $src", [], IIC_ALU>;
+ def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+ "sext8 $dst, $src", [], IIC_ALU>;
+ let Defs = [CARRY] in {
+ def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+ "srl $dst, $src", [], IIC_ALU>;
+ def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+ "sra $dst, $src", [], IIC_ALU>;
+ let Uses = [CARRY] in {
+ def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+ "src $dst, $src", [], IIC_ALU>;
+ }
+ }
+}
+
+let isCodeGenOnly=1 in {
+ def ADDIK32 : ArithI32<0x08, "addik ", simm16, immSExt16>;
+ def ORI32 : LogicI32<0x28, "ori ">;
+ def BRLID32 : BranchLI<0x2E, 0x14, "brlid ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+ def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+ "mfs $dst, $src", [], IIC_ALU>;
+}
+
+let Form=FCRCS in {
+ def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+ "mts $dst, $src", [], IIC_ALU>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+ "msrset $dst, $set", [], IIC_ALU>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+ "msrclr $dst, $clr", [], IIC_ALU>;
+
+let rd=0x0, Form=FCRR in {
+ def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+ "wdc $a, $b", [], IIC_WDC>;
+ def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.flush $a, $b", [], IIC_WDC>;
+ def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.clear $a, $b", [], IIC_WDC>;
+ def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+ "wic $a, $b", [], IIC_WDC>;
+}
+
+def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+ "imm $imm", [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+ def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+ "# atomic compare and swap",
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+ def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+ "# atomic swap",
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+ def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and add",
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and sub",
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and and",
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and or",
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and xor",
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and nand",
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+ def MEMBARRIER : MBlazePseudo<(outs), (ins),
+ "# memory barrier",
+ [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8), (SEXT8 GPR:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+ (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+ (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+ (BRALD (i32 R15), GPR:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 6)>;
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+ (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+ (BNEID GPR:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in), (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8 xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+
+// Atomic fence
+def : Pat<(atomic_fence (imm), (imm)), (MEMBARRIER)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 0000000..ea81dd6
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,113 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+ enum ID {
+ last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_mblaze_intrinsics
+ };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+ "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+ return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (IntrID == 0)
+ return false;
+ else
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 0000000..80760d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ unsigned lookupGCCName(const char *Name) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 0000000..278afbe
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,131 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+ class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+
+ class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+ MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+ MBFSL_PutT_Intrinsic;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 0000000..a7e400b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ bool ignored;
+ APFloat FVal = MO.getFPImm()->getValueAPF();
+ FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+ APInt IVal = FVal.bitcastToAPInt();
+ uint64_t Val = *IVal.getRawData();
+ MCOp = MCOperand::CreateImm(Val);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
new file mode 100644
index 0000000..92196f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class AsmPrinter;
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+
+ /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
+ MCContext &Ctx;
+ Mangler &Mang;
+
+ AsmPrinter &Printer;
+public:
+ MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+ : Ctx(ctx), Mang(mang), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 0000000..df39509
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,170 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MBlazeFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MBlazeFIHolder GPHolder;
+
+ /// On LowerFormalArguments the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFormalArguments, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ // When determining the final stack layout some of the frame indexes may
+ // be replaced by new frame indexes that reside in the caller's stack
+ // frame. The replacements are recorded in this structure.
+ DenseMap<int,int> FIReplacements;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ // VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// LiveInFI - keeps track of the frame indexes in a callers stack
+ /// frame that are live into a function.
+ SmallVector<int, 16> LiveInFI;
+
+public:
+ MBlazeFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+ HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+ VarArgsFrameIndex(0), LiveInFI()
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+ bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLiveIn(int FI) {
+ LiveInFI.push_back(FI);
+ }
+
+ bool isLiveIn(int FI) {
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+ if (FI == LiveInFI[i]) return true;
+
+ return false;
+ }
+
+ const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+ void recordReplacement(int OFI, int NFI) {
+ FIReplacements.insert(std::make_pair(OFI,NFI));
+ }
+
+ bool hasReplacement(int OFI) const {
+ return FIReplacements.find(OFI) != FIReplacements.end();
+ }
+
+ int getReplacement(int OFI) const {
+ return FIReplacements.lookup(OFI);
+ }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
+ }
+
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 0000000..9788ba9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,193 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+ : MBlazeGenRegisterInfo(MBlaze::R15), Subtarget(ST), TII(tii) {}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+ return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ // MBlaze callee-save register range is R20 - R31
+ static const unsigned CalleeSavedRegs[] = {
+ MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+ MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+ MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(MBlaze::R0);
+ Reserved.set(MBlaze::R1);
+ Reserved.set(MBlaze::R2);
+ Reserved.set(MBlaze::R13);
+ Reserved.set(MBlaze::R14);
+ Reserved.set(MBlaze::R15);
+ Reserved.set(MBlaze::R16);
+ Reserved.set(MBlaze::R17);
+ Reserved.set(MBlaze::R18);
+ Reserved.set(MBlaze::R19);
+ return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+ // 'addi r1, r1, <amt>'
+ MachineInstr *Old = I;
+ int Amount = Old->getOperand(0).getImm() + 4;
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+ New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+ New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned oi = i == 2 ? 1 : 2;
+
+ DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ dbgs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MFI->getStackSize();
+ int spOffset = MFI->getObjectOffset(FrameIndex);
+
+ DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ dbgs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n"
+ << "isFixed : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+ << "isLiveIn : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+ << "isSpill : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+ << "\n" );
+
+ // as explained on LowerFormalArguments, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+ Offset += MI.getOperand(oi).getImm();
+
+ DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(oi).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ // Set the stack offset where GP must be saved/loaded from.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->needGPSaveRestore())
+ MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 0000000..7e4b269
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,73 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MBlazeGenRegisterInfo.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// MBlazeRegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+ };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+ const MBlazeSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+ const TargetInstrInfo &tii);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 0000000..13c46ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,148 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+ field bits<15> Num;
+ let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+ let Num = num;
+}
+
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+ // General Purpose Registers
+ def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>;
+ def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>;
+ def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>;
+ def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>;
+ def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>;
+ def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>;
+ def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>;
+ def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>;
+ def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>;
+ def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>;
+ def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>;
+ def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>;
+ def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>;
+ def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>;
+ def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>;
+ def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>;
+ def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>;
+ def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>;
+ def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>;
+ def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>;
+ def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>;
+ def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>;
+
+ // Special Purpose Registers
+ def RPC : MBlazeSPRReg<0x0000, "rpc">, DwarfRegNum<[32]>;
+ def RMSR : MBlazeSPRReg<0x0001, "rmsr">, DwarfRegNum<[33]>;
+ def REAR : MBlazeSPRReg<0x0003, "rear">, DwarfRegNum<[34]>;
+ def RESR : MBlazeSPRReg<0x0005, "resr">, DwarfRegNum<[35]>;
+ def RFSR : MBlazeSPRReg<0x0007, "rfsr">, DwarfRegNum<[36]>;
+ def RBTR : MBlazeSPRReg<0x000B, "rbtr">, DwarfRegNum<[37]>;
+ def REDR : MBlazeSPRReg<0x000D, "redr">, DwarfRegNum<[38]>;
+ def RPID : MBlazeSPRReg<0x1000, "rpid">, DwarfRegNum<[39]>;
+ def RZPR : MBlazeSPRReg<0x1001, "rzpr">, DwarfRegNum<[40]>;
+ def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>;
+ def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+ def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+ def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>;
+ def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[45]>;
+ def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[46]>;
+ def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[47]>;
+ def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[48]>;
+ def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[49]>;
+ def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[50]>;
+ def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[51]>;
+ def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[52]>;
+ def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[53]>;
+ def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[54]>;
+ def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>;
+ def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>;
+
+ // The carry bit. In the Microblaze this is really bit 29 of the
+ // MSR register but this is the only bit of that register that we
+ // are interested in modeling.
+ def CARRY : MBlazeSPRReg<0x0000, "rmsr[c]">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>;
+
+def SPR : RegisterClass<"MBlaze", [i32], 32, (add
+ // Reserved
+ RPC,
+ RMSR,
+ REAR,
+ RESR,
+ RFSR,
+ RBTR,
+ REDR,
+ RPID,
+ RZPR,
+ RTLBX,
+ RTLBLO,
+ RTLBHI,
+ RPVR0,
+ RPVR1,
+ RPVR2,
+ RPVR3,
+ RPVR4,
+ RPVR5,
+ RPVR6,
+ RPVR7,
+ RPVR8,
+ RPVR9,
+ RPVR10,
+ RPVR11
+ )>
+{
+ // None of the special purpose registers are allocatable.
+ let isAllocatable = 0;
+}
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 0000000..c298eda
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace MBlaze {
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 0000000..4662f25
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,55 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze functional units.
+//===----------------------------------------------------------------------===//
+def IF : FuncUnit;
+def ID : FuncUnit;
+def EX : FuncUnit;
+def MA : FuncUnit;
+def WB : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIC_ALU : InstrItinClass;
+def IIC_ALUm : InstrItinClass;
+def IIC_ALUd : InstrItinClass;
+def IIC_SHT : InstrItinClass;
+def IIC_FSLg : InstrItinClass;
+def IIC_FSLp : InstrItinClass;
+def IIC_MEMs : InstrItinClass;
+def IIC_MEMl : InstrItinClass;
+def IIC_FPU : InstrItinClass;
+def IIC_FPUd : InstrItinClass;
+def IIC_FPUf : InstrItinClass;
+def IIC_FPUi : InstrItinClass;
+def IIC_FPUs : InstrItinClass;
+def IIC_FPUc : InstrItinClass;
+def IIC_BR : InstrItinClass;
+def IIC_BRc : InstrItinClass;
+def IIC_BRl : InstrItinClass;
+def IIC_WDC : InstrItinClass;
+def IIC_Pseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for three stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule3.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for five stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule5.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
new file mode 100644
index 0000000..ccbf99d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -0,0 +1,236 @@
+//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the three stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe3Itineraries : ProcessorItineraries<
+ [IF,ID,EX], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes three cycles. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the execute stage, which takes 34 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the execute stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<34,[EX]>], // 34 cycles in execute stage
+ [ 35 // result ready after 35 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages
+ // except the execute stage, which takes two cycles. The two source operands
+ // are read during the decode stage and the result is ready after the execute
+ // stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the execute stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the execute stage, which takes six cycles. The
+ // source operands are read during the decode stage and the results are ready
+ // after the execute stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 30
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<30,[EX]>], // one cycle in execute stage
+ [ 31 // result ready after 31 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes seven cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<7,[EX]>], // seven cycles in execute stage
+ [ 8 // result ready after eight cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes six cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 29
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<29,[EX]>], // 29 cycles in execute stage
+ [ 30 // result ready after 30 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages except the execute stage, which takes three
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages except the execute stage, which takes two cycles.
+ // The one source operand is read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. The two source operands are read during the decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. All of the source operands are read during the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes two cycles. All of
+ // the source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
new file mode 100644
index 0000000..fa88766
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -0,0 +1,267 @@
+//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the five stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe5Itineraries : ProcessorItineraries<
+ [IF,ID,EX,MA,WB], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. The two source operands are read during the decode stage
+ // and the result is ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the memory access stage, which takes 31 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the memory access stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<31,[MA]> // 31 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 33 // result ready after 33 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages.
+ // The two source operands are read during the decode stage and the result is
+ // ready after the memory access stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the writeback stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the memory access stage, which takes two
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 26
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<26,[MA]> // 26 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 29 // result ready after 29 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes three cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<3,[MA]> // three cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 6 // result ready after six cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes two cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 25
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<25,[MA]> // 25 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 28 // result ready after 28 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages. The source operands are read during the
+ // decode stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages. The one source operand is read during the decode
+ // stage and the result is ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. The two source operands are read during the
+ // decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. All of the source operands are read during
+ // the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. All of the source operands are read during the decode
+ // stage and the result is ready after the writeback stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 0000000..6a115b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeTargetMachine.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 0000000..9f8e2aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeTargetMachine;
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
+ ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 0000000..7e5667f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,63 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MBlazeGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS):
+ MBlazeGenSubtargetInfo(TT, CPU, FS),
+ HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
+ HasFPU(false), HasMul64(false), HasSqrt(false)
+{
+ // Parse features string.
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "mblaze";
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Only use instruction scheduling if the selected CPU has an instruction
+ // itinerary (the default CPU is the only one that doesn't).
+ HasItin = CPUName != "mblaze";
+ DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n");
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Compute the issue width of the MBlaze itineraries
+ computeIssueWidth();
+}
+
+void MBlazeSubtarget::computeIssueWidth() {
+ InstrItins.IssueWidth = 1;
+}
+
+bool MBlazeSubtarget::
+enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
+ return HasItin && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 0000000..43b0197
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,75 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MBlazeGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MBlazeSubtarget : public MBlazeGenSubtargetInfo {
+
+protected:
+ bool HasBarrel;
+ bool HasDiv;
+ bool HasMul;
+ bool HasPatCmp;
+ bool HasFPU;
+ bool HasMul64;
+ bool HasSqrt;
+ bool HasItin;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MBlazeSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// Compute the number of maximum number of issues per cycle for the
+ /// MBlaze scheduling itineraries.
+ void computeIssueWidth();
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ bool hasItin() const { return HasItin; }
+ bool hasPCMP() const { return HasPatCmp; }
+ bool hasFPU() const { return HasFPU; }
+ bool hasSqrt() const { return HasSqrt; }
+ bool hasMul() const { return HasMul; }
+ bool hasMul64() const { return HasMul64; }
+ bool hasDiv() const { return HasDiv; }
+ bool hasBarrel() const { return HasBarrel; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 0000000..7bff53e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,62 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+ // Register the target.
+ RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeDelaySlotFillerPass(*this));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 0000000..c1bc08a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,85 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "MBlazeFrameLowering.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MBlazeTargetMachine : public LLVMTargetMachine {
+ MBlazeSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MBlazeInstrInfo InstrInfo;
+ MBlazeFrameLowering FrameLowering;
+ MBlazeTargetLowering TLInfo;
+ MBlazeSelectionDAGInfo TSInfo;
+ MBlazeIntrinsicInfo IntrinsicInfo;
+ MBlazeELFWriterInfo ELFWriterInfo;
+ InstrItineraryData InstrItins;
+
+ public:
+ MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const MBlazeInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+
+ virtual const InstrItineraryData *getInstrItineraryData() const
+ { return &InstrItins; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
+
+ virtual const MBlazeSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MBlazeRegisterInfo *getRegisterInfo() const
+ { return &InstrInfo.getRegisterInfo(); }
+
+ virtual const MBlazeTargetLowering *getTargetLowering() const
+ { return &TLInfo; }
+
+ virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+ { return &TSInfo; }
+
+ const TargetIntrinsicInfo *getIntrinsicInfo() const
+ { return &IntrinsicInfo; }
+
+ virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+ return &ELFWriterInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
+ virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 0000000..f66ea30
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,90 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 0000000..c313722
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
new file mode 100644
index 0000000..08f7d46a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -0,0 +1,159 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+ switch (Kind) {
+ default: assert(0 && "invalid fixup kind!");
+ case FK_Data_1: return 1;
+ case FK_PCRel_2:
+ case FK_Data_2: return 2;
+ case FK_PCRel_4:
+ case FK_Data_4: return 4;
+ case FK_Data_8: return 8;
+ }
+}
+
+
+namespace {
+class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MBlazeELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
+ /*HasRelocationAddend*/ true) {}
+};
+
+class MBlazeAsmBackend : public MCAsmBackend {
+public:
+ MBlazeAsmBackend(const Target &T)
+ : MCAsmBackend() {
+ }
+
+ unsigned getNumFixupKinds() const {
+ return 2;
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ unsigned getPointerSize() const {
+ return 4;
+ }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case MBlaze::ADDIK: return MBlaze::ADDIK32;
+ case MBlaze::ORI: return MBlaze::ORI32;
+ case MBlaze::BRLID: return MBlaze::BRLID32;
+ }
+}
+
+bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+ bool hasExprOrImm = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+ return hasExprOrImm;
+}
+
+void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ Res = Inst;
+ Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if ((Count % 4) != 0)
+ return false;
+
+ for (uint64_t i = 0; i < Count; i += 4)
+ OW->Write32(0x00000000);
+
+ return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
+ : MBlazeAsmBackend(T), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ false);
+ }
+};
+
+void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned Size = getFixupKindSize(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+
+ char *data = Data + Fixup.getOffset();
+ switch (Size) {
+ default: llvm_unreachable("Cannot fixup unknown value.");
+ case 1: llvm_unreachable("Cannot fixup 1 byte value.");
+ case 8: llvm_unreachable("Cannot fixup 8 byte value.");
+
+ case 4:
+ *(data+7) = uint8_t(Value);
+ *(data+6) = uint8_t(Value >> 8);
+ *(data+3) = uint8_t(Value >> 16);
+ *(data+2) = uint8_t(Value >> 24);
+ break;
+
+ case 2:
+ *(data+3) = uint8_t(Value >> 0);
+ *(data+2) = uint8_t(Value >> 8);
+ }
+}
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ assert(0 && "Mac not supported on MBlaze");
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on MBlaze");
+
+ return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
new file mode 100644
index 0000000..776dbc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -0,0 +1,240 @@
+//===-- MBlazeBaseInfo.h - Top level definitions for MBlaze -- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the MBlaze target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeBASEINFO_H
+#define MBlazeBASEINFO_H
+
+#include "MBlazeMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ enum {
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ FPseudo = 0,
+ FRRR,
+ FRRI,
+ FCRR,
+ FCRI,
+ FRCR,
+ FRCI,
+ FCCR,
+ FCCI,
+ FRRCI,
+ FRRC,
+ FRCX,
+ FRCS,
+ FCRCS,
+ FCRCX,
+ FCX,
+ FCR,
+ FRIR,
+ FRRRR,
+ FRI,
+ FC,
+ FormMask = 63
+
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ // MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ // MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ // MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ // MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ // MO_ABS_HILO
+
+ };
+}
+
+static inline bool isMBlazeRegister(unsigned Reg) {
+ return Reg <= 31;
+}
+
+static inline bool isSpecialMBlazeRegister(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
+ case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
+ case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
+ case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
+ case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
+ case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
+ return true;
+
+ default:
+ return false;
+ }
+ return false; // Not reached
+}
+
+/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : return 0;
+ case MBlaze::R1 : return 1;
+ case MBlaze::R2 : return 2;
+ case MBlaze::R3 : return 3;
+ case MBlaze::R4 : return 4;
+ case MBlaze::R5 : return 5;
+ case MBlaze::R6 : return 6;
+ case MBlaze::R7 : return 7;
+ case MBlaze::R8 : return 8;
+ case MBlaze::R9 : return 9;
+ case MBlaze::R10 : return 10;
+ case MBlaze::R11 : return 11;
+ case MBlaze::R12 : return 12;
+ case MBlaze::R13 : return 13;
+ case MBlaze::R14 : return 14;
+ case MBlaze::R15 : return 15;
+ case MBlaze::R16 : return 16;
+ case MBlaze::R17 : return 17;
+ case MBlaze::R18 : return 18;
+ case MBlaze::R19 : return 19;
+ case MBlaze::R20 : return 20;
+ case MBlaze::R21 : return 21;
+ case MBlaze::R22 : return 22;
+ case MBlaze::R23 : return 23;
+ case MBlaze::R24 : return 24;
+ case MBlaze::R25 : return 25;
+ case MBlaze::R26 : return 26;
+ case MBlaze::R27 : return 27;
+ case MBlaze::R28 : return 28;
+ case MBlaze::R29 : return 29;
+ case MBlaze::R30 : return 30;
+ case MBlaze::R31 : return 31;
+ case MBlaze::RPC : return 0x0000;
+ case MBlaze::RMSR : return 0x0001;
+ case MBlaze::REAR : return 0x0003;
+ case MBlaze::RESR : return 0x0005;
+ case MBlaze::RFSR : return 0x0007;
+ case MBlaze::RBTR : return 0x000B;
+ case MBlaze::REDR : return 0x000D;
+ case MBlaze::RPID : return 0x1000;
+ case MBlaze::RZPR : return 0x1001;
+ case MBlaze::RTLBX : return 0x1002;
+ case MBlaze::RTLBLO : return 0x1003;
+ case MBlaze::RTLBHI : return 0x1004;
+ case MBlaze::RPVR0 : return 0x2000;
+ case MBlaze::RPVR1 : return 0x2001;
+ case MBlaze::RPVR2 : return 0x2002;
+ case MBlaze::RPVR3 : return 0x2003;
+ case MBlaze::RPVR4 : return 0x2004;
+ case MBlaze::RPVR5 : return 0x2005;
+ case MBlaze::RPVR6 : return 0x2006;
+ case MBlaze::RPVR7 : return 0x2007;
+ case MBlaze::RPVR8 : return 0x2008;
+ case MBlaze::RPVR9 : return 0x2009;
+ case MBlaze::RPVR10 : return 0x200A;
+ case MBlaze::RPVR11 : return 0x200B;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0 : return MBlaze::R0;
+ case 1 : return MBlaze::R1;
+ case 2 : return MBlaze::R2;
+ case 3 : return MBlaze::R3;
+ case 4 : return MBlaze::R4;
+ case 5 : return MBlaze::R5;
+ case 6 : return MBlaze::R6;
+ case 7 : return MBlaze::R7;
+ case 8 : return MBlaze::R8;
+ case 9 : return MBlaze::R9;
+ case 10 : return MBlaze::R10;
+ case 11 : return MBlaze::R11;
+ case 12 : return MBlaze::R12;
+ case 13 : return MBlaze::R13;
+ case 14 : return MBlaze::R14;
+ case 15 : return MBlaze::R15;
+ case 16 : return MBlaze::R16;
+ case 17 : return MBlaze::R17;
+ case 18 : return MBlaze::R18;
+ case 19 : return MBlaze::R19;
+ case 20 : return MBlaze::R20;
+ case 21 : return MBlaze::R21;
+ case 22 : return MBlaze::R22;
+ case 23 : return MBlaze::R23;
+ case 24 : return MBlaze::R24;
+ case 25 : return MBlaze::R25;
+ case 26 : return MBlaze::R26;
+ case 27 : return MBlaze::R27;
+ case 28 : return MBlaze::R28;
+ case 29 : return MBlaze::R29;
+ case 30 : return MBlaze::R30;
+ case 31 : return MBlaze::R31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : return MBlaze::RPC;
+ case 0x0001 : return MBlaze::RMSR;
+ case 0x0003 : return MBlaze::REAR;
+ case 0x0005 : return MBlaze::RESR;
+ case 0x0007 : return MBlaze::RFSR;
+ case 0x000B : return MBlaze::RBTR;
+ case 0x000D : return MBlaze::REDR;
+ case 0x1000 : return MBlaze::RPID;
+ case 0x1001 : return MBlaze::RZPR;
+ case 0x1002 : return MBlaze::RTLBX;
+ case 0x1003 : return MBlaze::RTLBLO;
+ case 0x1004 : return MBlaze::RTLBHI;
+ case 0x2000 : return MBlaze::RPVR0;
+ case 0x2001 : return MBlaze::RPVR1;
+ case 0x2002 : return MBlaze::RPVR2;
+ case 0x2003 : return MBlaze::RPVR3;
+ case 0x2004 : return MBlaze::RPVR4;
+ case 0x2005 : return MBlaze::RPVR5;
+ case 0x2006 : return MBlaze::RPVR6;
+ case 0x2007 : return MBlaze::RPVR7;
+ case 0x2008 : return MBlaze::RPVR8;
+ case 0x2009 : return MBlaze::RPVR9;
+ case 0x200A : return MBlaze::RPVR10;
+ case 0x200B : return MBlaze::RPVR11;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
new file mode 100644
index 0000000..0d88466
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,24 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+ IsLittleEndian = false;
+ StackGrowsUp = false;
+ SupportsDebugInformation = true;
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ GPRel32Directive = "\t.gpword\t";
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
new file mode 100644
index 0000000..e68dd58
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class MBlazeMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit MBlazeMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 0000000..1514557
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,224 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+ MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+
+public:
+ MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii) {
+ }
+
+ ~MBlazeMCCodeEmitter() {}
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+ unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+ // FIXME: getMBlazeRegisterNumbering() is sufficient?
+ assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
+ return 0;
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ // The MicroBlaze uses a bit reversed format so we need to reverse the
+ // order of the bits. Taken from:
+ // http://graphics.stanford.edu/~seander/bithacks.html
+ C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ assert(Size <= 8 && "size too big in emit constant");
+
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+ void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+ void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO) const {
+ if (MO.isReg())
+ return getMBlazeRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isExpr())
+ return 0; // The relocation has already been recorded at this point.
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable(0);
+ }
+ return 0;
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+ int32_t val = (int32_t)imm.getImm();
+ if (val > 32767 || val < -32768) {
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+ EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+ switch (MI.getOpcode()) {
+ default: break;
+
+ case MBlaze::ADDIK32:
+ case MBlaze::ORI32:
+ case MBlaze::BRLID32:
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+ raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+ MCOperand oper = MI.getOperand(opNo);
+
+ if (oper.isImm()) {
+ EmitIMM(oper, CurByte, OS);
+ } else if (oper.isExpr()) {
+ MCFixupKind FixupKind;
+ switch (MI.getOpcode()) {
+ default:
+ FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ case MBlaze::ORI32:
+ case MBlaze::ADDIK32:
+ case MBlaze::BRLID32:
+ FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ }
+ }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Emit an IMM instruction if the instruction we are encoding requires it
+ EmitIMM(MI,CurByte,OS);
+
+ switch ((TSFlags & MBlazeII::FormMask)) {
+ default: break;
+ case MBlazeII::FPseudo:
+ // Pseudo instructions don't get encoded.
+ return;
+ case MBlazeII::FRRI:
+ EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRIR:
+ EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FCRI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRCI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ case MBlazeII::FCCI:
+ EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+ break;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted
+ unsigned Value = getBinaryCodeForInstr(MI);
+ EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
new file mode 100644
index 0000000..43ae281
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -0,0 +1,140 @@
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCTargetDesc.h"
+#include "MBlazeMCAsmInfo.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MBlazeGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMBlazeMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMBlazeMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createMBlazeMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMBlazeMCRegisterInfo(X, MBlaze::R15);
+ return X;
+}
+
+static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMBlazeMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return new MBlazeMCAsmInfo();
+ }
+}
+
+static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+ return NULL;
+ }
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("MBlaze does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MBlazeInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMBlazeTarget,
+ createMBlazeMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMBlazeTarget,
+ createMBlazeMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+ createMBlazeMCSubtargetInfo);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMBlazeTarget,
+ llvm::createMBlazeMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheMBlazeTarget,
+ createMBlazeAsmBackend);
+
+ // Register the object streamer
+ TargetRegistry::RegisterMCObjectStreamer(TheMBlazeTarget,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+ createMBlazeMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
new file mode 100644
index 0000000..deff5cb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -0,0 +1,49 @@
+//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEMCTARGETDESC_H
+#define MBLAZEMCTARGETDESC_H
+
+namespace llvm {
+class MCAsmBackend;
+class MCContext;
+class MCCodeEmitter;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class formatted_raw_ostream;
+
+extern Target TheMBlazeTarget;
+
+MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT);
+
+} // End llvm namespace
+
+// Defines symbolic names for MBlaze registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MBlazeGenRegisterInfo.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#define GET_INSTRINFO_ENUM
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MBlazeGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 0000000..71210d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+ RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
new file mode 100644
index 0000000..5d6c6ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -0,0 +1,115 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << '#' << *Op.getExpr();
+ }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+ // Print displacement first
+
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Base.getReg())
+ O << '&';
+
+ if (Disp.isExpr())
+ O << *Disp.getExpr();
+ else {
+ assert(Disp.isImm() && "Expected immediate in displacement field");
+ O << Disp.getImm();
+ }
+
+ // Print register base field
+ if (Base.getReg())
+ O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CC = MI->getOperand(OpNo).getImm();
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported CC code");
+ break;
+ case MSP430CC::COND_E:
+ O << "eq";
+ break;
+ case MSP430CC::COND_NE:
+ O << "ne";
+ break;
+ case MSP430CC::COND_HS:
+ O << "hs";
+ break;
+ case MSP430CC::COND_LO:
+ O << "lo";
+ break;
+ case MSP430CC::COND_GE:
+ O << "ge";
+ break;
+ case MSP430CC::COND_L:
+ O << 'l';
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
new file mode 100644
index 0000000..a1984a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430INSTPRINTER_H
+#define MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MSP430InstPrinter : public MCInstPrinter {
+ public:
+ MSP430InstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
new file mode 100644
index 0000000..ad7d380
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -0,0 +1,28 @@
+//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCAsmInfo.h"
+using namespace llvm;
+
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+ PointerSize = 2;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective ="\t.weak\t";
+ PCSymbol=".";
+ CommentString = ";";
+
+ AlignmentIsInBytes = false;
+ AllowNameToStartWithDigit = true;
+ UsesELFSectionDirectiveForBSS = true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
new file mode 100644
index 0000000..f3138a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct MSP430MCAsmInfo : public MCAsmInfo {
+ explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
new file mode 100644
index 0000000..fda70b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -0,0 +1,91 @@
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "MSP430MCAsmInfo.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MSP430GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMSP430MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMSP430MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMSP430MCRegisterInfo(X, MSP430::PCW);
+ return X;
+}
+
+static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MSP430InstPrinter(MAI);
+ return 0;
+}
+
+extern "C" void LLVMInitializeMSP430TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMSP430Target,
+ createMSP430MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMSP430Target,
+ createMSP430MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+ createMSP430MCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+ createMSP430MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
new file mode 100644
index 0000000..35f2590
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MCTARGETDESC_H
+#define MSP430MCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMSP430Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#define GET_INSTRINFO_ENUM
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MSP430GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
new file mode 100644
index 0000000..4574ce5
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,47 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace MSP430CC {
+ // MSP430 specific condition code.
+ enum CondCodes {
+ COND_E = 0, // aka COND_Z
+ COND_NE = 1, // aka COND_NZ
+ COND_HS = 2, // aka COND_C
+ COND_LO = 3, // aka COND_NC
+ COND_GE = 4,
+ COND_L = 5,
+
+ COND_INVALID = -1
+ };
+}
+
+namespace llvm {
+ class MSP430TargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createMSP430BranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
new file mode 100644
index 0000000..5cc5e6e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,66 @@
+//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+ "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo;
+
+def MSP430InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+ let InstructionSet = MSP430InstrInfo;
+ let AssemblyWriters = [MSP430InstPrinter];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 0000000..8836549
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,167 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCInstLower.h"
+#include "MSP430TargetMachine.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class MSP430AsmPrinter : public AsmPrinter {
+ public:
+ MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "MSP430 Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char* Modifier = 0);
+ void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default: assert(0 && "Not implemented yet!");
+ case MachineOperand::MO_Register:
+ O << MSP430InstPrinter::getRegisterName(MO.getReg());
+ return;
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << '#';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ uint64_t Offset = MO.getOffset();
+
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << (isMemOp ? '&' : '#');
+ if (Offset)
+ O << '(' << Offset << '+';
+
+ O << *Mang->getSymbol(MO.getGlobal());
+
+ if (Offset)
+ O << ')';
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ O << (isMemOp ? '&' : '#');
+ O << MAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+ }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+ // Print displacement first
+
+ // Imm here is in fact global address - print extra modifier.
+ if (Disp.isImm() && !Base.getReg())
+ O << '&';
+ printOperand(MI, OpNum+1, O, "nohash");
+
+ // Print register base field
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ O << ')';
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ return true; // Unknown modifier.
+ }
+ printSrcMemOperand(MI, OpNo, O);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+ RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
new file mode 100644
index 0000000..bd64443
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -0,0 +1,180 @@
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 10 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-branch-select"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct MSP430BSel : public MachineFunctionPass {
+ static char ID;
+ MSP430BSel() : MachineFunctionPass(ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "MSP430 Branch Selector";
+ }
+ };
+ char MSP430BSel::ID = 0;
+}
+
+/// createMSP430BranchSelectionPass - returns an instance of the Branch
+/// Selection Pass
+///
+FunctionPass *llvm::createMSP430BranchSelectionPass() {
+ return new MSP430BSel();
+}
+
+bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
+ const MSP430InstrInfo *TII =
+ static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 9)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+6
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
+ I->getOpcode() != MSP430::JMP) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(0).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<10>(BranchSize)) {
+ MBBStartOffset += 2;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ unsigned NewSize;
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ if (I->getOpcode() == MSP430::JMP) {
+ NewSize = 4;
+ } else {
+ // The BCC operands are:
+ // 0. MSP430 branch predicate
+ // 1. Target MBB
+ SmallVector<MachineOperand, 1> Cond;
+ Cond.push_back(I->getOperand(1));
+
+ // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
+ TII->ReverseBranchCondition(Cond);
+ BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
+ .addImm(4).addOperand(Cond[0]);
+
+ NewSize = 6;
+ }
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is NewSize bytes, increase the size of the
+ // block by NewSize-2, remember to iterate.
+ BlockSizes[MBB.getNumber()] += NewSize-2;
+ MBBStartOffset += NewSize;
+
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 0000000..ad27cc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,37 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+ // i8 are returned in registers R15B, R14B, R13B, R12B
+ CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+ // i16 are returned in registers R15, R14, R13, R12
+ CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+ // Promote i8 arguments to i16.
+ CCIfType<[i8], CCPromoteToType<i16>>,
+
+ // The first 4 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+ // Integer values get stored in stack slots that are 2 bytes in
+ // size and 2-byte aligned.
+ CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 0000000..c99f4ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,223 @@
+//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save FPW into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(MSP430::FPW, RegState::Kill);
+
+ // Update FPW with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+ .addReg(MSP430::SPW);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(MSP430::FPW);
+
+ } else
+ NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+ // If there is an SUB16ri of SPW immediately before this instruction, merge
+ // the two.
+ //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+ // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // instruction, merge the two instructions.
+ // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case MSP430::RET:
+ case MSP430::RETI: break; // These are ok
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - CSSize;
+
+ // pop FPW.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ } else
+ NumBytes = StackSize - CSSize;
+
+ // Skip the callee-saved pop instructions.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // instruction, merge the two instructions.
+ //if (NumBytes || MFI->hasVarSizedObjects())
+ // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+ if (CSSize) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(CSSize);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ // adjust stack pointer back: SPW += numbytes
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 0000000..b636827
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,53 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+ const MSP430Subtarget &STI;
+
+public:
+ explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 0000000..5430d43
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,492 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MSP430ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ int16_t Disp;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ MSP430ISelAddressMode()
+ : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+ ES(0), JT(-1), Align(0) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ }
+
+ void dump() {
+ errs() << "MSP430ISelAddressMode " << this << '\n';
+ if (BaseType == RegBase && Base.Reg.getNode() != 0) {
+ errs() << "Base.Reg ";
+ Base.Reg.getNode()->dump();
+ } else if (BaseType == FrameIndexBase) {
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+ }
+ errs() << " Disp " << Disp << '\n';
+ if (GV) {
+ errs() << "GV ";
+ GV->dump();
+ } else if (CP) {
+ errs() << " CP ";
+ CP->dump();
+ errs() << " Align" << Align << '\n';
+ } else if (ES) {
+ errs() << "ES ";
+ errs() << ES << '\n';
+ } else if (JT != -1)
+ errs() << " JT" << JT << " Align" << Align << '\n';
+ }
+ };
+}
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class MSP430DAGToDAGISel : public SelectionDAGISel {
+ const MSP430TargetLowering &Lowering;
+ const MSP430Subtarget &Subtarget;
+
+ public:
+ MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual const char *getPassName() const {
+ return "MSP430 DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
+ virtual bool
+ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Include the pieces autogenerated from the target description.
+ #include "MSP430GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ SDNode *SelectIndexedLoad(SDNode *Op);
+ SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16);
+
+ bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
+ };
+} // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference. If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
+ return true;
+
+ SDValue N0 = N.getOperand(0);
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ //AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ //AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ //AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ //AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+ return false;
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = MSP430ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ AM.Disp += Val;
+ return false;
+ }
+
+ case MSP430ISD::Wrapper:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase
+ && AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::ADD: {
+ MSP430ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM))
+ return false;
+ AM = Backup;
+
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ MSP430ISelAddressMode Backup = AM;
+ uint64_t Offset = CN->getSExtValue();
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += Offset;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
+ SDValue &Base, SDValue &Disp) {
+ MSP430ISelAddressMode AM;
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
+ MVT::i16, AM.Disp,
+ 0/*AM.SymbolFlags*/);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+ AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.BlockAddr)
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+ true, 0/*AM.SymbolFlags*/);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
+
+ return true;
+}
+
+bool MSP430DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+ return false;
+
+ break;
+ case MVT::i16:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+ return false;
+
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+
+ unsigned Opcode = 0;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ Opcode = MSP430::MOV8rm_POST;
+ break;
+ case MVT::i16:
+ Opcode = MSP430::MOV16rm_POST;
+ break;
+ default:
+ return NULL;
+ }
+
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(),
+ VT, MVT::i16, MVT::Other,
+ LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
+ SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16) {
+ if (N1.getOpcode() == ISD::LOAD &&
+ N1.hasOneUse() &&
+ IsLegalToFold(N1, Op, Op, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N1);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+ unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+ SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+ SDNode *ResNode =
+ CurDAG->SelectNodeTo(Op, Opc,
+ VT, MVT::i16, MVT::Other,
+ Ops0, 3);
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ // Transfer chain.
+ ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+ // Transfer writeback.
+ ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+ return ResNode;
+ }
+
+ return NULL;
+}
+
+
+SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: ");
+ DEBUG(Node->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== ";
+ Node->dump(CurDAG);
+ errs() << "\n");
+ return NULL;
+ }
+
+ // Few custom selection stuff.
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ assert(Node->getValueType(0) == MVT::i16);
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ }
+ case ISD::LOAD:
+ if (SDNode *ResNode = SelectIndexedLoad(Node))
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ case ISD::ADD:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SUB:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::AND:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::OR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::XOR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 0000000..dc37431
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,1198 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+typedef enum {
+ NoHWMult,
+ HWMultIntr,
+ HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+ cl::desc("Hardware multiplier use mode"),
+ cl::init(HWMultNoIntr),
+ cl::values(
+ clEnumValN(NoHWMult, "no",
+ "Do not use hardware multiplier"),
+ clEnumValN(HWMultIntr, "interrupts",
+ "Assume hardware multiplier can be used inside interrupts"),
+ clEnumValN(HWMultNoIntr, "use",
+ "Assume hardware multiplier cannot be used inside interrupts"),
+ clEnumValEnd));
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+ TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+ TD = getTargetData();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
+ addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ setSchedulingPreference(Sched::Latency);
+
+ // We have post-incremented loads / stores.
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // We don't have any truncstores
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i16, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+ setOperationAction(ISD::CTTZ, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i8, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // FIXME: Implement efficiently multiplication by a constant
+ setOperationAction(ISD::MUL, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+
+ // Libcalls names.
+ if (HWMultMode == HWMultIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+ } else if (HWMultMode == HWMultNoIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+ }
+
+ setMinFunctionAlignment(1);
+ setPrefFunctionAlignment(2);
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::SHL: // FALLTHROUGH
+ case ISD::SRL:
+ case ISD::SRA: return LowerShifts(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operand");
+ return SDValue();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return C_RegisterClass;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+MSP430TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ if (VT == MVT::i8)
+ return std::make_pair(0U, MSP430::GR8RegisterClass);
+
+ return std::make_pair(0U, MSP430::GR16RegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue
+MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ if (Ins.empty())
+ return Chain;
+ else {
+ report_fatal_error("ISRs cannot have arguments");
+ return SDValue();
+ }
+ }
+}
+
+SDValue
+MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // MSP430 target does not yet support tail call optimization.
+ isTailCall = false;
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ report_fatal_error("ISRs cannot be called directly");
+ return SDValue();
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+
+ assert(!isVarArg && "Varargs not supported yet");
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ EVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT().SimpleTy) {
+ default:
+ {
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ case MVT::i16:
+ unsigned VReg =
+ RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+ // If this is an 8-bit value, it is really passed promoted to 16
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ }
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+MSP430TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // ISRs cannot return any value.
+ if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+ report_fatal_error("ISRs cannot return any value");
+ return SDValue();
+ }
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+ MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
+ if (Flag.getNode())
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(Opc, dl, MVT::Other, Chain);
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+ DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ SDNode* N = Op.getNode();
+ EVT VT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand non-constant shifts to loops:
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ switch (Opc) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(MSP430ISD::SHL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(MSP430ISD::SRA, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(MSP430ISD::SRL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ }
+
+ uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+ // Expand the stuff into sequence of shifts.
+ // FIXME: for some shift amounts this might be done better!
+ // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+ SDValue Victim = N->getOperand(0);
+
+ if (Opc == ISD::SRL && ShiftAmount) {
+ // Emit a special goodness here:
+ // srl A, 1 => clrc; rrc A
+ Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ ShiftAmount -= 1;
+ }
+
+ while (ShiftAmount--)
+ Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+ dl, VT, Victim);
+
+ return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ // Create the TargetGlobalAddress node, folding in the constant offset.
+ SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ getPointerTy(), Offset);
+ return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+ getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
+ ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // FIXME: Handle bittests someday
+ assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+ // FIXME: Handle jump negative someday
+ MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID;
+ switch (CC) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ:
+ TCC = MSP430CC::COND_E; // aka COND_Z
+ // Minor optimization: if LHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (LHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
+ break;
+ case ISD::SETNE:
+ TCC = MSP430CC::COND_NE; // aka COND_NZ
+ // Minor optimization: if LHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (LHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
+ break;
+ case ISD::SETULE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETUGE:
+ // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_LO;
+ break;
+ }
+ TCC = MSP430CC::COND_HS; // aka COND_C
+ break;
+ case ISD::SETUGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETULT:
+ // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_HS;
+ break;
+ }
+ TCC = MSP430CC::COND_LO; // aka COND_NC
+ break;
+ case ISD::SETLE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETGE:
+ // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_L;
+ break;
+ }
+ TCC = MSP430CC::COND_GE;
+ break;
+ case ISD::SETGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETLT:
+ // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_GE;
+ break;
+ }
+ TCC = MSP430CC::COND_L;
+ break;
+ }
+
+ TargetCC = DAG.getConstant(TCC, MVT::i8);
+ return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+ Chain, Dest, TargetCC, Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we are doing an AND and testing against zero, then the CMP
+ // will not be generated. The AND (or BIT) will generate the condition codes,
+ // but they are different from CMP.
+ // FIXME: since we're doing a post-processing, use a pseudoinstr here, so
+ // lowering & isel wouldn't diverge.
+ bool andCC = false;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND))) {
+ andCC = true;
+ }
+ }
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ // Get the condition codes directly from the status register, if its easy.
+ // Otherwise a branch will be generated. Note that the AND and BIT
+ // instructions generate different flags than CMP, the carry bit can be used
+ // for NE/EQ.
+ bool Invert = false;
+ bool Shift = false;
+ bool Convert = true;
+ switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+ default:
+ Convert = false;
+ break;
+ case MSP430CC::COND_HS:
+ // Res = SRW & 1, no processing is required
+ break;
+ case MSP430CC::COND_LO:
+ // Res = ~(SRW & 1)
+ Invert = true;
+ break;
+ case MSP430CC::COND_NE:
+ if (andCC) {
+ // C = ~Z, thus Res = SRW & 1, no processing is required
+ } else {
+ // Res = ~((SRW >> 1) & 1)
+ Shift = true;
+ Invert = true;
+ }
+ break;
+ case MSP430CC::COND_E:
+ Shift = true;
+ // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+ // Res = (SRW >> 1) & 1 is 1 word shorter.
+ break;
+ }
+ EVT VT = Op.getValueType();
+ SDValue One = DAG.getConstant(1, VT);
+ if (Convert) {
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ MVT::i16, Flag);
+ if (Shift)
+ // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+ SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+ SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+ if (Invert)
+ SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+ return SR;
+ } else {
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(One);
+ Ops.push_back(Zero);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ }
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Val = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert(VT == MVT::i16 && "Only support i16 for now!");
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+ DAG.getValueType(Val.getValueType()));
+}
+
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ MSP430::FPW, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+ if (VT != MVT::i8 && VT != MVT::i16)
+ return false;
+
+ if (Op->getOpcode() != ISD::ADD)
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+ uint64_t RHSC = RHS->getZExtValue();
+ if ((VT == MVT::i16 && RHSC != 2) ||
+ (VT == MVT::i8 && RHSC != 1))
+ return false;
+
+ Base = Op->getOperand(0);
+ Offset = DAG.getConstant(RHSC, VT);
+ AM = ISD::POST_INC;
+ return true;
+ }
+
+ return false;
+}
+
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
+ case MSP430ISD::RRA: return "MSP430ISD::RRA";
+ case MSP430ISD::RLA: return "MSP430ISD::RLA";
+ case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::CALL: return "MSP430ISD::CALL";
+ case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
+ case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
+ case MSP430ISD::CMP: return "MSP430ISD::CMP";
+ case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ case MSP430ISD::SHL: return "MSP430ISD::SHL";
+ case MSP430ISD::SRA: return "MSP430ISD::SRA";
+ }
+}
+
+bool MSP430TargetLowering::isTruncateFree(Type *Ty1,
+ Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
+}
+
+bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+
+ return (VT1.getSizeInBits() > VT2.getSizeInBits());
+}
+
+bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+ // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+ return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
+}
+
+bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+ return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RI = F->getRegInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+ unsigned Opc;
+ const TargetRegisterClass * RC;
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case MSP430::Shl8:
+ Opc = MSP430::SHL8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Shl16:
+ Opc = MSP430::SHL16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Sra8:
+ Opc = MSP430::SAR8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Sra16:
+ Opc = MSP430::SAR16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Srl8:
+ Opc = MSP430::SAR8r1c;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Srl16:
+ Opc = MSP430::SAR16r1c;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ }
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // Create loop block
+ MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(I, LoopBB);
+ F->insert(I, RemBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the block containing instructions after shift.
+ RemBB->splice(RemBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ RemBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ BB->addSuccessor(LoopBB);
+ BB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftReg = RI.createVirtualRegister(RC);
+ unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+ unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // BB:
+ // cmp 0, N
+ // je RemBB
+ BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
+ .addReg(ShiftAmtSrcReg).addImm(0);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(RemBB)
+ .addImm(MSP430CC::COND_E);
+
+ // LoopBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // ShiftReg2 = shift ShiftReg
+ // ShiftAmt2 = ShiftAmt - 1;
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+ .addReg(ShiftAmtSrcReg).addMBB(BB)
+ .addReg(ShiftAmtReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
+ BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+ .addReg(ShiftAmtReg).addImm(1);
+ BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+ .addMBB(LoopBB)
+ .addImm(MSP430CC::COND_NE);
+
+ // RemBB:
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+ BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return RemBB;
+}
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ return EmitShiftInstr(MI, BB);
+
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 0000000..237f604
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,182 @@
+//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace MSP430ISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// Same as RET_FLAG, but used for returning from ISRs.
+ RETI_FLAG,
+
+ /// Y = R{R,L}A X, rotate right (left) arithmetically
+ RRA, RLA,
+
+ /// Y = RRC X, rotate right via carry
+ RRC,
+
+ /// CALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+ /// and TargetGlobalAddress.
+ Wrapper,
+
+ /// CMP - Compare instruction.
+ CMP,
+
+ /// SetCC - Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// instruction.
+ BR_CC,
+
+ /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
+ /// is condition code and operand 4 is flag operand.
+ SELECT_CC,
+
+ /// SHL, SRA, SRL - Non-constant shifts.
+ SHL, SRA, SRL
+ };
+ }
+
+ class MSP430Subtarget;
+ class MSP430TargetMachine;
+
+ class MSP430TargetLowering : public TargetLowering {
+ public:
+ explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+ TargetLowering::ConstraintType
+ getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of type
+ /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
+ /// register R15W to i8 by referencing its sub-register R15B.
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a value
+ /// of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in unknown
+ /// ways, such as incoming arguments, or copies from unknown virtual
+ /// registers. Also, if isTruncateFree(Ty2, Ty1) is true, this does not
+ /// necessarily apply to truncate instructions. e.g. on msp430, all
+ /// instructions that define 8-bit values implicit zero-extend the result
+ /// out to 16 bits.
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ private:
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ const MSP430Subtarget &Subtarget;
+ const MSP430TargetMachine &TM;
+ const TargetData *TD;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 0000000..73aef1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,211 @@
+//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MSP430 instructions format here
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def PseudoFrm : Format<0>;
+def SingleOpFrm : Format<1>;
+def DoubleOpFrm : Format<2>;
+def CondJumpFrm : Format<3>;
+
+class SourceMode<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def SrcReg : SourceMode<0>;
+def SrcMem : SourceMode<1>;
+def SrcIndReg : SourceMode<2>;
+def SrcPostInc : SourceMode<3>;
+def SrcImm : SourceMode<3>;
+
+class DestMode<bit val> {
+ bit Value = val;
+}
+
+def DstReg : DestMode<0>;
+def DstMem : DestMode<1>;
+
+class SizeVal<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def SizeUnknown : SizeVal<0>; // Unknown / unset size
+def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
+def Size2Bytes : SizeVal<2>;
+def Size4Bytes : SizeVal<3>;
+def Size6Bytes : SizeVal<4>;
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
+ string asmstr> : Instruction {
+ field bits<16> Inst;
+
+ let Namespace = "MSP430";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ Format Form = f;
+ SizeVal Sz = sz;
+
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
+ let TSFlags{1-0} = Form.Value;
+ let TSFlags{4-2} = Sz.Value;
+
+ let AsmString = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+ let Pattern = pattern;
+
+ DestMode ad = dest;
+ SourceMode as = src;
+
+ let Inst{12-15} = opcode;
+ let Inst{7} = ad.Value;
+ let Inst{6} = bw;
+ let Inst{4-5} = as.Value;
+}
+
+// 8 bit IForm instructions
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class I8rr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I8ri<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8rm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mi<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I8mm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IForm instructions
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class I16rr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I16ri<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16rm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mi<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I16mm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+ let Pattern = pattern;
+
+ SourceMode as = src;
+
+ let Inst{7-15} = opcode;
+ let Inst{6} = bw;
+ let Inst{4-5} = as.Value;
+}
+
+// 8 bit IIForm instructions
+class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class II8r<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II8m<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II8i<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IIForm instructions
+class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class II16r<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II16m<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II16i<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{13-15} = opcode;
+ let Inst{10-12} = cond;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+ let Pattern = pattern;
+ let Inst{15-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 0000000..ffd4318
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,336 @@
+//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MSP430GenInstrInfo.inc"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+ : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+ RI(tm, *this), TM(tm) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else
+ llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+ else
+ llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV16rr;
+ else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV8rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != MSP430::JMP &&
+ I->getOpcode() != MSP430::JCC &&
+ I->getOpcode() != MSP430::Br &&
+ I->getOpcode() != MSP430::Bm)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+bool MSP430InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+ MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
+
+ switch (CC) {
+ default:
+ assert(0 && "Invalid branch condition!");
+ break;
+ case MSP430CC::COND_E:
+ CC = MSP430CC::COND_NE;
+ break;
+ case MSP430CC::COND_NE:
+ CC = MSP430CC::COND_E;
+ break;
+ case MSP430CC::COND_L:
+ CC = MSP430CC::COND_GE;
+ break;
+ case MSP430CC::COND_GE:
+ CC = MSP430CC::COND_L;
+ break;
+ case MSP430CC::COND_HS:
+ CC = MSP430CC::COND_LO;
+ break;
+ case MSP430CC::COND_LO:
+ CC = MSP430CC::COND_HS;
+ break;
+ }
+
+ Cond[0].setImm(CC);
+ return false;
+}
+
+bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+
+ // Cannot handle indirect branches.
+ if (I->getOpcode() == MSP430::Br ||
+ I->getOpcode() == MSP430::Bm)
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == MSP430::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch");
+ MSP430CC::CondCodes BranchCode =
+ static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm());
+ if (BranchCode == MSP430CC::COND_INVALID)
+ return true; // Can't handle weird stuff.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "MSP430 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ BuildMI(&MBB, DL, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+
+ switch (Desc.TSFlags & MSP430II::SizeMask) {
+ default:
+ switch (Desc.getOpcode()) {
+ default:
+ assert(0 && "Unknown instruction size!");
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::INLINEASM: {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
+ *MF->getTarget().getMCAsmInfo());
+ }
+ }
+ case MSP430II::SizeSpecial:
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Unknown instruction size!");
+ case MSP430::SAR8r1c:
+ case MSP430::SAR16r1c:
+ return 4;
+ }
+ case MSP430II::Size2Bytes:
+ return 2;
+ case MSP430II::Size4Bytes:
+ return 4;
+ case MSP430II::Size6Bytes:
+ return 6;
+ }
+
+ return 6;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 0000000..90013f5
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,92 @@
+//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MSP430RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MSP430GenInstrInfo.inc"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+/// MSP430II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MSP430II {
+ enum {
+ SizeShift = 2,
+ SizeMask = 7 << SizeShift,
+
+ SizeUnknown = 0 << SizeShift,
+ SizeSpecial = 1 << SizeShift,
+ Size2Bytes = 2 << SizeShift,
+ Size4Bytes = 3 << SizeShift,
+ Size6Bytes = 4 << SizeShift
+ };
+}
+
+class MSP430InstrInfo : public MSP430GenInstrInfo {
+ const MSP430RegisterInfo RI;
+ MSP430TargetMachine &TM;
+public:
+ explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ // Branch folding goodness
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 0000000..59cb598
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,1211 @@
+//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisI8<2>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def MSP430callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def MSP430callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+ [SDNPHasChain, SDNPInGlue]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+ [SDNPInGlue]>;
+def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImmOperand";
+}
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+ let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
+ "# Select8 PSEUDO",
+ [(set GR8:$dst,
+ (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
+ "# Select16 PSEUDO",
+ [(set GR16:$dst,
+ (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
+ let Defs = [SRW] in {
+ def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Shl8 PSEUDO",
+ [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Shl16 PSEUDO",
+ [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Sra8 PSEUDO",
+ [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Sra16 PSEUDO",
+ [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Srl8 PSEUDO",
+ [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Srl16 PSEUDO",
+ [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+ }
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def RET : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs), (ins), "ret", [(MSP430retflag)]>;
+ def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// FIXME: expand opcode & cond field for branches!
+
+// Direct branch
+let isBarrier = 1 in {
+ // Short branch
+ def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+ "jmp\t$dst",
+ [(br bb:$dst)]>;
+ let isIndirectBranch = 1 in {
+ // Long branches
+ def Bi : I16ri<0, (outs), (ins i16imm:$brdst),
+ "br\t$brdst",
+ [(brind tblockaddress:$brdst)]>;
+ def Br : I16rr<0, (outs), (ins GR16:$brdst),
+ "mov.w\t{$brdst, pc}",
+ [(brind GR16:$brdst)]>;
+ def Bm : I16rm<0, (outs), (ins memsrc:$brdst),
+ "mov.w\t{$brdst, pc}",
+ [(brind (load addr:$brdst))]>;
+ }
+}
+
+// Conditional branches
+let Uses = [SRW] in
+ def JCC : CJForm<0, 0,
+ (outs), (ins jmptarget:$dst, cc:$cc),
+ "j$cc\t$dst",
+ [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. SPW is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [R12W, R13W, R14W, R15W, SRW],
+ Uses = [SPW] in {
+ def CALLi : II16i<0x0,
+ (outs), (ins i16imm:$dst, variable_ops),
+ "call\t$dst", [(MSP430call imm:$dst)]>;
+ def CALLr : II16r<0x0,
+ (outs), (ins GR16:$dst, variable_ops),
+ "call\t$dst", [(MSP430call GR16:$dst)]>;
+ def CALLm : II16m<0x0,
+ (outs), (ins memsrc:$dst, variable_ops),
+ "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r : II16r<0x0,
+ (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ []>;
+def MOV16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "mov.w\t{$src, $dst}",
+ []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : I8rr<0x0,
+ (outs GR16:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : I8rm<0x0,
+ (outs GR16:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.w\t{@$base+, $dst}", []>;
+}
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+def MOV8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "mov.w\t{$src, $dst}",
+ [(store GR16:$src, addr:$dst)]>;
+
+def MOV8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 (load addr:$src)), addr:$dst)]>;
+def MOV16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 (load addr:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Constraints = "$src = $dst" in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+
+def ADD8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def ADD16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def ADD8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADD16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "add.w\t{@$base+, $dst}", []>;
+}
+
+
+def ADD8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def ADD16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADD8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
+def ADC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def ADC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def ADC16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def ADC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def AND16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def AND8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def AND16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def AND8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def AND16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "and.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def AND8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def AND16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def AND16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def AND16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
+def OR16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
+}
+
+def OR8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
+def OR16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
+
+def OR8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
+def OR16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "bis.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def OR8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (i8 (load addr:$dst)),
+ (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (i16 (load addr:$dst)),
+ (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
+def BIC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
+
+def BIC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
+
+let Constraints = "" in {
+def BIC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def XOR16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def XOR8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def XOR16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def XOR8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def XOR16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "xor.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def XOR8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+def SUB8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def SUB16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SUB8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def SUB16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def SUB8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def SUB16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "sub.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def SUB8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def SBC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SBC8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def SBC16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def SBC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def SBC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def SBC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: memory variant!
+def SAR8r1 : II8r<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "rra.b\t$dst",
+ [(set GR8:$dst, (MSP430rra GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1 : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "rra.w\t$dst",
+ [(set GR16:$dst, (MSP430rra GR16:$src)),
+ (implicit SRW)]>;
+
+def SHL8r1 : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "rla.b\t$dst",
+ [(set GR8:$dst, (MSP430rla GR8:$src)),
+ (implicit SRW)]>;
+def SHL16r1 : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "rla.w\t$dst",
+ [(set GR16:$dst, (MSP430rla GR16:$src)),
+ (implicit SRW)]>;
+
+def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "clrc\n\t"
+ "rrc.b\t$dst",
+ [(set GR8:$dst, (MSP430rrc GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "clrc\n\t"
+ "rrc.w\t$dst",
+ [(set GR16:$dst, (MSP430rrc GR16:$src)),
+ (implicit SRW)]>;
+
+// FIXME: Memory sext's ?
+def SEXT16r : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "sxt\t$dst",
+ [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+ (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def ZEXT16r : I8rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
+// FIXME: Memory bitswaps?
+def SWPB16r : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "swpb\t$dst",
+ [(set GR16:$dst, (bswap GR16:$src))]>;
+
+} // Constraints = "$src = $dst"
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr : I8rr<0x0,
+ (outs), (ins GR8:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : I16rr<0x0,
+ (outs), (ins GR16:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ri : I8ri<0x0,
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+def CMP16ri : I16ri<0x0,
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+
+def CMP8mi : I8mi<0x0,
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
+ (i8 imm:$src2)), (implicit SRW)]>;
+def CMP16mi : I16mi<0x0,
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
+ (i16 imm:$src2)), (implicit SRW)]>;
+
+def CMP8rm : I8rm<0x0,
+ (outs), (ins GR8:$src, memsrc:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, (load addr:$src2)),
+ (implicit SRW)]>;
+def CMP16rm : I16rm<0x0,
+ (outs), (ins GR16:$src, memsrc:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, (load addr:$src2)),
+ (implicit SRW)]>;
+
+def CMP8mr : I8mr<0x0,
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR8:$src2),
+ (implicit SRW)]>;
+def CMP16mr : I16mr<0x0,
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR16:$src2),
+ (implicit SRW)]>;
+
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : I8rr<0x0,
+ (outs), (ins GR8:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
+ (implicit SRW)]>;
+def BIT16rr : I16rr<0x0,
+ (outs), (ins GR16:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
+ (implicit SRW)]>;
+}
+def BIT8ri : I8ri<0x0,
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
+ (implicit SRW)]>;
+def BIT16ri : I16ri<0x0,
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+ (implicit SRW)]>;
+
+def BIT8rm : I8rm<0x0,
+ (outs), (ins GR8:$src, memdst:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
+ (implicit SRW)]>;
+def BIT16rm : I16rm<0x0,
+ (outs), (ins GR16:$src, memdst:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
+ (implicit SRW)]>;
+
+def BIT8mr : I8mr<0x0,
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
+ (implicit SRW)]>;
+def BIT16mr : I16mr<0x0,
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
+ (implicit SRW)]>;
+
+def BIT8mi : I8mi<0x0,
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+ (implicit SRW)]>;
+def BIT16mi : I16mi<0x0,
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+ (implicit SRW)]>;
+
+def BIT8mm : I8mm<0x0,
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (i8 (load addr:$src)),
+ (load addr:$src2)),
+ 0),
+ (implicit SRW)]>;
+def BIT16mm : I16mm<0x0,
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (i16 (load addr:$src)),
+ (load addr:$src2)),
+ 0),
+ (implicit SRW)]>;
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
+
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+ (ADD16ri GR16:$src, tblockaddress:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+ (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tblockaddress:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+ (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+ (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src, GR16:$src2),
+ (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+ (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+ (ADD16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+ (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src, GR8:$src2),
+ (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+ (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+ (ADD8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+ (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src, GR16:$src2),
+ (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+ (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+ (SUB16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+ (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src, GR8:$src2),
+ (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+ (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+ (SUB8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+ (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
+ (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
new file mode 100644
index 0000000..d1d9a11
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -0,0 +1,150 @@
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MSP430 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MSP430MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: assert(0 && "Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: assert(0 && "Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MSP430MCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ assert(0 && "unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
new file mode 100644
index 0000000..e937696
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCINSTLOWER_H
+#define MSP430_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class AsmPrinter;
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+
+ /// MSP430MCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
+ MCContext &Ctx;
+ Mangler &Mang;
+
+ AsmPrinter &Printer;
+public:
+ MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+ : Ctx(ctx), Mang(mang), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 0000000..383fd2e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,46 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+public:
+ MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 0000000..9049c4b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,240 @@
+//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : MSP430GenRegisterInfo(MSP430::PCW), TM(tm), TII(tii) {
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
+}
+
+const unsigned*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const Function* F = MF->getFunction();
+ static const unsigned CalleeSavedRegs[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+ static const unsigned CalleeSavedRegsFP[] = {
+ MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+ static const unsigned CalleeSavedRegsIntr[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
+ static const unsigned CalleeSavedRegsIntrFP[] = {
+ MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
+
+ if (TFI->hasFP(*MF))
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
+ else
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntr : CalleeSavedRegs);
+
+}
+
+BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Mark 4 special registers with subregisters as reserved.
+ Reserved.set(MSP430::PCB);
+ Reserved.set(MSP430::SPB);
+ Reserved.set(MSP430::SRB);
+ Reserved.set(MSP430::CGB);
+ Reserved.set(MSP430::PCW);
+ Reserved.set(MSP430::SPW);
+ Reserved.set(MSP430::SRW);
+ Reserved.set(MSP430::CGW);
+
+ // Mark frame pointer as reserved if needed.
+ if (TFI->hasFP(MF))
+ Reserved.set(MSP430::FPW);
+
+ return Reserved;
+}
+
+const TargetRegisterClass *
+MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return &MSP430::GR16RegClass;
+}
+
+void MSP430RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // Skip the saved PC
+ Offset += 2;
+
+ if (!TFI->hasFP(MF))
+ Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += 2; // Skip the saved FPW
+
+ // Fold imm into offset
+ Offset += MI.getOperand(i+1).getImm();
+
+ if (MI.getOpcode() == MSP430::ADD16ri) {
+ // This is actually "load effective address" of the stack slot
+ // instruction. We have only two-address instructions, thus we need to
+ // expand it into mov + add
+
+ MI.setDesc(TII.get(MSP430::MOV16rr));
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ if (Offset == 0)
+ return;
+
+ // We need to materialize the offset via add instruction.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (Offset < 0)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ .addReg(DstReg).addImm(-Offset);
+ else
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ .addReg(DstReg).addImm(Offset);
+
+ return;
+ }
+
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+void
+MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the FPW register that must be saved.
+ if (TFI->hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ }
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 0000000..10a3d53
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,66 @@
+//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MSP430GenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+ MSP430TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+public:
+ MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
+
+ const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const {
+ // No sub-classes makes this really easy.
+ return A;
+ }
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 0000000..d1c2e3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,85 @@
+//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+def PCB : MSP430Reg<0, "r0">;
+def SPB : MSP430Reg<1, "r1">;
+def SRB : MSP430Reg<2, "r2">;
+def CGB : MSP430Reg<3, "r3">;
+def FPB : MSP430Reg<4, "r4">;
+def R5B : MSP430Reg<5, "r5">;
+def R6B : MSP430Reg<6, "r6">;
+def R7B : MSP430Reg<7, "r7">;
+def R8B : MSP430Reg<8, "r8">;
+def R9B : MSP430Reg<9, "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
+def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+}
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+ // Volatile registers
+ (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+ // Frame pointer, sometimes allocable
+ FPB,
+ // Volatile, but not allocable
+ PCB, SPB, SRB, CGB)>;
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+ // Volatile registers
+ (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ // Frame pointer, sometimes allocable
+ FPW,
+ // Volatile, but not allocable
+ PCW, SPW, SRW, CGW)>
+{
+ let SubRegClasses = [(GR8 subreg_8bit)];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 0000000..24f45fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430TargetMachine.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 0000000..fa81948
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+ ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 0000000..3ee14d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,32 @@
+//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MSP430GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS) :
+ MSP430GenSubtargetInfo(TT, CPU, FS) {
+ std::string CPUName = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 0000000..1ce5f11
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,42 @@
+//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MSP430GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+
+class MSP430Subtarget : public MSP430GenSubtargetInfo {
+ bool ExtendedInsts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ MSP430Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 0000000..4dd8933
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,52 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMSP430Target() {
+ // Register the target.
+ RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+}
+
+MSP430TargetMachine::MSP430TargetMachine(const Target &T,
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ // FIXME: Check TargetData string.
+ DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) { }
+
+
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createMSP430ISelDag(*this, OptLevel));
+ return false;
+}
+
+bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createMSP430BranchSelectionPass());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 0000000..eb483dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,70 @@
+//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430FrameLowering.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+ MSP430Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
+ MSP430FrameLowering FrameLowering;
+
+public:
+ MSP430TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const MSP430TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
new file mode 100644
index 0000000..8b3e01e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSP430Target;
+
+extern "C" void LLVMInitializeMSP430TargetInfo() {
+ RegisterTarget<Triple::msp430>
+ X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
new file mode 100644
index 0000000..53ad155f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -0,0 +1,235 @@
+//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for assembly backends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/Mangler.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+static bool isAcceptableChar(char C, bool AllowPeriod) {
+ if ((C < 'a' || C > 'z') &&
+ (C < 'A' || C > 'Z') &&
+ (C < '0' || C > '9') &&
+ C != '_' && C != '$' && C != '@' &&
+ !(AllowPeriod && C == '.'))
+ return false;
+ return true;
+}
+
+static char HexDigit(int V) {
+ return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
+ OutName.push_back('_');
+ OutName.push_back(HexDigit(C >> 4));
+ OutName.push_back(HexDigit(C & 15));
+ OutName.push_back('_');
+}
+
+/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
+/// for this assembler.
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
+ assert(!Str.empty() && "Cannot create an empty MCSymbol");
+
+ // If the first character is a number and the target does not allow this, we
+ // need quotes.
+ if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
+ return true;
+
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ if (!isAcceptableChar(Str[i], AllowPeriod))
+ return true;
+ return false;
+}
+
+/// appendMangledName - Add the specified string in mangled form if it uses
+/// any unusual characters.
+static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
+ const MCAsmInfo &MAI) {
+ // The first character is not allowed to be a number unless the target
+ // explicitly allows it.
+ if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+ MangleLetter(OutName, Str[0]);
+ Str = Str.substr(1);
+ }
+
+ bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (!isAcceptableChar(Str[i], AllowPeriod))
+ MangleLetter(OutName, Str[i]);
+ else
+ OutName.push_back(Str[i]);
+ }
+}
+
+
+/// appendMangledQuotedName - On systems that support quoted symbols, we still
+/// have to escape some (obscure) characters like " and \n which would break the
+/// assembler's lexing.
+static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
+ StringRef Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] == '"' || Str[i] == '\n')
+ MangleLetter(OutName, Str[i]);
+ else
+ OutName.push_back(Str[i]);
+ }
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified name as the global variable name. GVName must not be
+/// empty.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+ const Twine &GVName, ManglerPrefixTy PrefixTy) {
+ SmallString<256> TmpData;
+ StringRef Name = GVName.toStringRef(TmpData);
+ assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+
+ // If the global name is not led with \1, add the appropriate prefixes.
+ if (Name[0] == '\1') {
+ Name = Name.substr(1);
+ } else {
+ if (PrefixTy == Mangler::Private) {
+ const char *Prefix = MAI.getPrivateGlobalPrefix();
+ OutName.append(Prefix, Prefix+strlen(Prefix));
+ } else if (PrefixTy == Mangler::LinkerPrivate) {
+ const char *Prefix = MAI.getLinkerPrivateGlobalPrefix();
+ OutName.append(Prefix, Prefix+strlen(Prefix));
+ }
+
+ const char *Prefix = MAI.getGlobalPrefix();
+ if (Prefix[0] == 0)
+ ; // Common noop, no prefix.
+ else if (Prefix[1] == 0)
+ OutName.push_back(Prefix[0]); // Common, one character prefix.
+ else
+ OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary length prefix.
+ }
+
+ // If this is a simple string that doesn't need escaping, just append it.
+ if (!NameNeedsEscaping(Name, MAI) ||
+ // If quotes are supported, they can be used unless the string contains
+ // a quote or newline.
+ (MAI.doesAllowQuotesInName() &&
+ Name.find_first_of("\n\"") == StringRef::npos)) {
+ OutName.append(Name.begin(), Name.end());
+ return;
+ }
+
+ // On systems that do not allow quoted names, we need to mangle most
+ // strange characters.
+ if (!MAI.doesAllowQuotesInName())
+ return appendMangledName(OutName, Name, MAI);
+
+ // Okay, the system allows quoted strings. We can quote most anything, the
+ // only characters that need escaping are " and \n.
+ assert(Name.find_first_of("\n\"") != StringRef::npos);
+ return appendMangledQuotedName(OutName, Name);
+}
+
+/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
+/// a suffix on their name indicating the number of words of arguments they
+/// take.
+static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
+ const Function *F, const TargetData &TD) {
+ // Calculate arguments size total.
+ unsigned ArgWords = 0;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Type *Ty = AI->getType();
+ // 'Dereference' type in case of byval parameter attribute
+ if (AI->hasByValAttr())
+ Ty = cast<PointerType>(Ty)->getElementType();
+ // Size should be aligned to DWORD boundary
+ ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ raw_svector_ostream(OutName) << '@' << ArgWords;
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified global variable's name. If the global variable doesn't
+/// have a name, this fills in a unique name for the global.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+ const GlobalValue *GV,
+ bool isImplicitlyPrivate) {
+ ManglerPrefixTy PrefixTy = Mangler::Default;
+ if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+ PrefixTy = Mangler::Private;
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+ GV->hasLinkerPrivateWeakDefAutoLinkage())
+ PrefixTy = Mangler::LinkerPrivate;
+
+ // If this global has a name, handle it simply.
+ if (GV->hasName()) {
+ getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+ } else {
+ // Get the ID for the global, assigning a new one if we haven't got one
+ // already.
+ unsigned &ID = AnonGlobalIDs[GV];
+ if (ID == 0) ID = NextAnonGlobalID++;
+
+ // Must mangle the global into a unique ID.
+ getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+ }
+
+ // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
+ // add it.
+ if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ CallingConv::ID CC = F->getCallingConv();
+
+ // fastcall functions need to start with @.
+ // FIXME: This logic seems unlikely to be right.
+ if (CC == CallingConv::X86_FastCall) {
+ if (OutName[0] == '_')
+ OutName[0] = '@';
+ else
+ OutName.insert(OutName.begin(), '@');
+ }
+
+ // fastcall and stdcall functions usually need @42 at the end to specify
+ // the argument info.
+ FunctionType *FT = F->getFunctionType();
+ if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+ // "Pure" variadic functions do not receive @0 suffix.
+ (!FT->isVarArg() || FT->getNumParams() == 0 ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr())))
+ AddFastCallStdCallSuffix(OutName, F, TD);
+ }
+ }
+}
+
+/// getSymbol - Return the MCSymbol for the specified global value. This
+/// symbol is the main label that is the address of the global.
+MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
+ SmallString<60> NameStr;
+ getNameWithPrefix(NameStr, GV, false);
+ return Context.GetOrCreateSymbol(NameStr.str());
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
new file mode 100644
index 0000000..3dafc61
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -0,0 +1,129 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MipsInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "MipsGenAsmWriter.inc"
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+ switch (CC) {
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_OEQ:
+ case FCOND_UNE: return "eq";
+ case FCOND_UEQ:
+ case FCOND_ONE: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "nge";
+ case FCOND_LE:
+ case FCOND_NLE: return "le";
+ case FCOND_NGT:
+ case FCOND_GT: return "ngt";
+ }
+ llvm_unreachable("Impossible condition code!");
+}
+
+StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << '$' << LowercaseString(getRegisterName(RegNo));
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
+void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum+1, O);
+ O << "(";
+ printOperand(MI, opNum, O);
+ O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ const MCOperand& MO = MI->getOperand(opNum);
+ O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
new file mode 100644
index 0000000..5c11165
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -0,0 +1,100 @@
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTPRINTER_H
+#define MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were orignally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_OEQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_UNE,
+ FCOND_ONE,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class TargetMachine;
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+ MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getInstructionName(unsigned Opcode);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+ void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
new file mode 100644
index 0000000..f190ec4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -0,0 +1,117 @@
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class MipsELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine,
+ HasRelocationAddend) {}
+};
+
+class MipsAsmBackend : public MCAsmBackend {
+public:
+ MipsAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return 1; //tbd
+ }
+
+ /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+ /// data fragment, at the offset specified by the fixup and following the
+ /// fixup kind as appropriate.
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ }
+
+ /// @name Target Relaxation Interfaces
+ /// @{
+
+ /// MayNeedRelaxation - Check whether the given instruction may need
+ /// relaxation.
+ ///
+ /// \param Inst - The instruction to test.
+ bool MayNeedRelaxation(const MCInst &Inst) const {
+ return false;
+ }
+
+ /// RelaxInstruction - Relax the instruction in the given fragment to the next
+ /// wider instruction.
+ ///
+ /// \param Inst - The instruction to relax, which may be the same as the
+ /// output.
+ /// \parm Res [output] - On return, the relaxed instruction.
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ }
+
+ /// @}
+
+ /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
+ /// output. If the target cannot generate such a sequence, it should return an
+ /// error.
+ ///
+ /// \return - True on success.
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return false;
+ }
+};
+
+class MipsEB_AsmBackend : public MipsAsmBackend {
+public:
+ Triple::OSType OSType;
+
+ MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType)
+ : MipsAsmBackend(T), OSType(_OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ false);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
+ }
+};
+
+class MipsEL_AsmBackend : public MipsAsmBackend {
+public:
+ Triple::OSType OSType;
+
+ MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType)
+ : MipsAsmBackend(T), OSType(_OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
+ }
+};
+}
+
+MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ // just return little endian for now
+ //
+ return new MipsEL_AsmBackend(T, Triple(TT).getOS());
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
new file mode 100644
index 0000000..f7a6fa9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -0,0 +1,113 @@
+//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Mips target useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPSBASEINFO_H
+#define MIPSBASEINFO_H
+
+#include "MipsMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+/// getMipsRegisterNumbering - Given the enum value for some register,
+/// return the number that it corresponds to.
+inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
+ case Mips::D0:
+ return 0;
+ case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ return 1;
+ case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
+ case Mips::D1:
+ return 2;
+ case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ return 3;
+ case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
+ case Mips::D2:
+ return 4;
+ case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
+ return 5;
+ case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
+ case Mips::D3:
+ return 6;
+ case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
+ return 7;
+ case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
+ case Mips::D4:
+ return 8;
+ case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
+ return 9;
+ case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
+ case Mips::D5:
+ return 10;
+ case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
+ return 11;
+ case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
+ case Mips::D6:
+ return 12;
+ case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
+ return 13;
+ case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
+ case Mips::D7:
+ return 14;
+ case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
+ return 15;
+ case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
+ case Mips::D8:
+ return 16;
+ case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
+ return 17;
+ case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
+ case Mips::D9:
+ return 18;
+ case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
+ return 19;
+ case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
+ case Mips::D10:
+ return 20;
+ case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
+ return 21;
+ case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
+ case Mips::D11:
+ return 22;
+ case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
+ return 23;
+ case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
+ case Mips::D12:
+ return 24;
+ case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
+ return 25;
+ case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
+ case Mips::D13:
+ return 26;
+ case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
+ return 27;
+ case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
+ case Mips::D14:
+ return 28;
+ case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ return 29;
+ case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
+ case Mips::D15:
+ return 30;
+ case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
+ return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
new file mode 100644
index 0000000..8b099ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -0,0 +1,90 @@
+#ifndef LLVM_Mips_MipsFIXUPKINDS_H
+#define LLVM_Mips_MipsFIXUPKINDS_H
+
+//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Mips {
+ enum Fixups {
+ // fixup_Mips_xxx - R_MIPS_NONE
+ fixup_Mips_NONE = FirstTargetFixupKind,
+
+ // fixup_Mips_xxx - R_MIPS_16.
+ fixup_Mips_16,
+
+ // fixup_Mips_xxx - R_MIPS_32.
+ fixup_Mips_32,
+
+ // fixup_Mips_xxx - R_MIPS_REL32.
+ fixup_Mips_REL32,
+
+ // fixup_Mips_xxx - R_MIPS_26.
+ fixup_Mips_26,
+
+ // fixup_Mips_xxx - R_MIPS_HI16.
+ fixup_Mips_HI16,
+
+ // fixup_Mips_xxx - R_MIPS_LO16.
+ fixup_Mips_LO16,
+
+ // fixup_Mips_xxx - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
+
+ // fixup_Mips_xxx - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
+
+ // fixup_Mips_xxx - R_MIPS_GOT16.
+ fixup_Mips_GOT16,
+
+ // fixup_Mips_xxx - R_MIPS_PC16.
+ fixup_Mips_PC16,
+
+ // fixup_Mips_xxx - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
+
+ // fixup_Mips_xxx - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
+
+ // fixup_Mips_xxx - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
+
+ // fixup_Mips_xxx - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
+
+ // fixup_Mips_xxx - R_MIPS_64.
+ fixup_Mips_64,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
+
+ // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+} // namespace llvm
+} // namespace Mips
+
+
+#endif /* LLVM_Mips_MipsFIXUPKINDS_H */
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
new file mode 100644
index 0000000..71ae804
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -0,0 +1,39 @@
+//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ if ((TheTriple.getArch() == Triple::mips) ||
+ (TheTriple.getArch() == Triple::mips64))
+ IsLittleEndian = false;
+
+ AlignmentIsInBytes = false;
+ Data16bitsDirective = "\t.2byte\t";
+ Data32bitsDirective = "\t.4byte\t";
+ Data64bitsDirective = 0;
+ PrivateGlobalPrefix = "$";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ GPRel32Directive = "\t.gpword\t";
+ WeakRefDirective = "\t.weak\t";
+
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ HasLEB128 = true;
+ DwarfRegNumForCFI = true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
new file mode 100644
index 0000000..41b7192
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class MipsMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit MipsMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
new file mode 100644
index 0000000..d66de23
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -0,0 +1,52 @@
+//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+//
+#define DEBUG_TYPE "mccodeemitter"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+
+using namespace llvm;
+
+namespace {
+class MipsMCCodeEmitter : public MCCodeEmitter {
+ MipsMCCodeEmitter(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+
+public:
+ MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti) {}
+
+ ~MipsMCCodeEmitter() {}
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ }
+}; // class MipsMCCodeEmitter
+} // namespace
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MipsMCCodeEmitter(MCII, STI, Ctx);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
new file mode 100644
index 0000000..1f9e3dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -0,0 +1,162 @@
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCTargetDesc.h"
+#include "MipsMCAsmInfo.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MipsGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMipsMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMipsMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMipsMCRegisterInfo(X, Mips::RA);
+ return X;
+}
+
+static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMipsMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new MipsMCAsmInfo(T, TT);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Mips::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::PIC_;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ return new MipsInstPrinter(MAI);
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeMipsTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
+ createMipsMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
+ createMipsMCRegisterInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
+ createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
+ createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
+ createMipsMCCodeEmitter);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
+ createMipsMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
+ createMipsMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
new file mode 100644
index 0000000..7a0042a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -0,0 +1,50 @@
+//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCTARGETDESC_H
+#define MIPSMCTARGETDESC_H
+
+namespace llvm {
+class MCAsmBackend;
+class MCInstrInfo;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+
+extern Target TheMipsTarget;
+extern Target TheMipselTarget;
+extern Target TheMips64Target;
+extern Target TheMips64elTarget;
+
+MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT);
+} // End llvm namespace
+
+// Defines symbolic names for Mips registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MipsGenRegisterInfo.inc"
+
+// Defines symbolic names for the Mips instructions.
+#define GET_INSTRINFO_ENUM
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MipsGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
new file mode 100644
index 0000000..bacecf2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -0,0 +1,37 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MipsTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+ FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
+
+ FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
new file mode 100644
index 0000000..39c2c16
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -0,0 +1,97 @@
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
+ "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
+ "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+ "true", "Only supports single precision float">;
+def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+ "Enable o32 ABI">;
+def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32",
+ "Enable n32 ABI">;
+def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
+ "Enable n64 ABI">;
+def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+ "Enable eabi ABI">;
+def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
+ "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+ "Enable 'signext in register' instructions.">;
+def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
+ "Enable 'conditional move' instructions.">;
+def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
+ "Enable 'multiply add/sub' instructions.">;
+def FeatureMinMax : SubtargetFeature<"minmax", "HasMinMax", "true",
+ "Enable 'min/max' instructions.">;
+def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
+ "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
+ "Enable 'count leading bits' instructions.">;
+def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+ "Mips32 ISA Support",
+ [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
+ "Mips32r2", "Mips32r2 ISA Support",
+ [FeatureMips32, FeatureSEInReg]>;
+def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
+ "Mips64", "Mips64 ISA Support",
+ [FeatureGP64Bit, FeatureFP64Bit,
+ FeatureMips32]>;
+def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
+ "Mips64r2", "Mips64r2 ISA Support",
+ [FeatureMips64, FeatureMips32r2]>;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips32r1", [FeatureMips32]>;
+def : Proc<"4ke", [FeatureMips32r2]>;
+def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips64r2", [FeatureMips64r2]>;
+
+def MipsAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def Mips : Target {
+ let InstructionSet = MipsInstrInfo;
+
+ let AssemblyWriters = [MipsAsmWriter];
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
new file mode 100644
index 0000000..49b0223
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -0,0 +1,214 @@
+//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips64 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def shamt_64 : Operand<i64>;
+
+// Unsigned Operand
+def uimm16_64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Transformation Function - get Imm - 32.
+def Subtract32 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() - 32);
+}]>;
+
+// imm32_63 predicate - True if imm is in range [32, 63].
+def imm32_63 : ImmLeaf<i64,
+ [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
+ Subtract32>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+// Shifts
+class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm,
+ SDNode OpNode, PatFrag PF>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$b, shamt_64:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, (i64 PF:$c)))],
+ IIAlu> {
+ let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$c, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, CPU64Regs:$c))], IIAlu> {
+ let shamt = _shamt;
+}
+
+// Mul, Div
+let Defs = [HI64, LO64] in {
+ let isCommutable = 1 in
+ class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$$zero, $a, $b"),
+ [(op CPU64Regs:$a, CPU64Regs:$b)], itin>;
+}
+
+// Move from Hi/Lo
+let shamt = 0 in {
+let rs = 0, rt = 0 in
+class MoveFromLOHI64<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins),
+ !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+
+let rt = 0, rd = 0 in
+class MoveToLOHI64<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$src),
+ !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+}
+
+// Count Leading Ones/Zeros in Word
+class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>:
+ FR<0x1c, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16,
+ CPU64Regs>;
+def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>;
+def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
+def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
+def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
+def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
+def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>;
+def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>;
+def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>;
+def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>;
+def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>;
+def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
+def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
+
+/// Shift Instructions
+def DSLL : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>;
+def DSRL : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>;
+def DSRA : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>;
+def DSLL32 : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>;
+def DSRL32 : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>;
+def DSRA32 : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>;
+def DSLLV : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>;
+def DSRLV : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>;
+def DSRAV : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>;
+
+// Rotate Instructions
+let Predicates = [HasMips64r2] in {
+ def DROTR : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>;
+ def DROTR32 : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr,
+ imm32_63>;
+ def DROTRV : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>;
+}
+
+/// Load and Store Instructions
+/// aligned
+defm LB64 : LoadM64<0x20, "lb", sextloadi8>;
+defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>;
+defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>;
+defm LHu64 : LoadM64<0x25, "lhu", zextloadi16_a>;
+defm LW64 : LoadM64<0x23, "lw", sextloadi32_a>;
+defm LWu64 : LoadM64<0x27, "lwu", zextloadi32_a>;
+defm SB64 : StoreM64<0x28, "sb", truncstorei8>;
+defm SH64 : StoreM64<0x29, "sh", truncstorei16_a>;
+defm SW64 : StoreM64<0x2b, "sw", truncstorei32_a>;
+defm LD : LoadM64<0x37, "ld", load_a>;
+defm SD : StoreM64<0x3f, "sd", store_a>;
+
+/// unaligned
+defm ULH64 : LoadM64<0x21, "ulh", sextloadi16_u, 1>;
+defm ULHu64 : LoadM64<0x25, "ulhu", zextloadi16_u, 1>;
+defm ULW64 : LoadM64<0x23, "ulw", sextloadi32_u, 1>;
+defm USH64 : StoreM64<0x29, "ush", truncstorei16_u, 1>;
+defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>;
+defm ULD : LoadM64<0x37, "uld", load_u, 1>;
+defm USD : StoreM64<0x3f, "usd", store_u, 1>;
+
+/// Jump and Branch Instructions
+def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
+def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
+def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
+def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
+def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>;
+def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+
+/// Multiply and Divide Instructions.
+def DMULT : Mul64<0x1c, "dmult", IIImul>;
+def DMULTu : Mul64<0x1d, "dmultu", IIImul>;
+def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
+def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
+
+let Defs = [HI64] in
+ def MTHI64 : MoveToLOHI64<0x11, "mthi">;
+let Defs = [LO64] in
+ def MTLO64 : MoveToLOHI64<0x13, "mtlo">;
+
+let Uses = [HI64] in
+ def MFHI64 : MoveFromLOHI64<0x10, "mfhi">;
+let Uses = [LO64] in
+ def MFLO64 : MoveFromLOHI64<0x12, "mflo">;
+
+/// Count Leading
+def DCLZ : CountLeading64<0x24, "dclz",
+ [(set CPU64Regs:$dst, (ctlz CPU64Regs:$src))]>;
+def DCLO : CountLeading64<0x25, "dclo",
+ [(set CPU64Regs:$dst, (ctlz (not CPU64Regs:$src)))]>;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i64 immSExt16:$in),
+ (DADDiu ZERO_64, imm:$in)>;
+def : Pat<(i64 immZExt16:$in),
+ (ORi64 ZERO_64, imm:$in)>;
+
+// zextloadi32_u
+def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>,
+ Requires<[IsN64]>;
+def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>,
+ Requires<[NotN64]>;
+
+// hi/lo relocs
+def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>;
+
+defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
+ ZERO_64>;
+
+// setcc patterns
+defm : SeteqPats<CPU64Regs, SLTiu64, XOR64, SLTu64, ZERO_64>;
+defm : SetlePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 0000000..0e82681
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,458 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+#include "MipsAsmPrinter.h"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
+#include "MipsMCSymbolRefExpr.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+using namespace llvm;
+
+static bool isUnalignedLoadStore(unsigned Opc) {
+ return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu ||
+ Opc == Mips::USW || Opc == Mips::USH ||
+ Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 ||
+ Opc == Mips::USW_P8 || Opc == Mips::USH_P8;
+}
+
+void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ if (MI->isDebugValue()) {
+ PrintDebugValueComment(MI, OS);
+ return;
+ }
+
+ MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+ unsigned Opc = MI->getOpcode();
+ MCInst TmpInst0;
+ MCInstLowering.Lower(MI, TmpInst0);
+
+ // Enclose unaligned load or store with .macro & .nomacro directives.
+ if (isUnalignedLoadStore(Opc)) {
+ MCInst Directive;
+ Directive.setOpcode(Mips::MACRO);
+ OutStreamer.EmitInstruction(Directive);
+ OutStreamer.EmitInstruction(TmpInst0);
+ Directive.setOpcode(Mips::NOMACRO);
+ OutStreamer.EmitInstruction(Directive);
+ return;
+ }
+
+ OutStreamer.EmitInstruction(TmpInst0);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Mips Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "(f)mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame $fp,48,$ra
+// .mask 0xc0000000,-8
+// addiu $sp, $sp, -48
+// sw $ra, 40($sp)
+// sw $fp, 36($sp)
+//
+// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+// 30 (FP) are saved at prologue. As the save order on prologue is from
+// left to right, RA is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (RA) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+ // CPU and FPU Saved Registers Bitmasks
+ unsigned CPUBitmask = 0, FPUBitmask = 0;
+ int CPUTopSavedRegOff, FPUTopSavedRegOff;
+
+ // Set the CPU and FPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ // size of stack area to which FP callee-saved regs are saved.
+ unsigned CPURegSize = Mips::CPURegsRegisterClass->getSize();
+ unsigned FGR32RegSize = Mips::FGR32RegisterClass->getSize();
+ unsigned AFGR64RegSize = Mips::AFGR64RegisterClass->getSize();
+ bool HasAFGR64Reg = false;
+ unsigned CSFPRegsSize = 0;
+ unsigned i, e = CSI.size();
+
+ // Set FPU Bitmask.
+ for (i = 0; i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Mips::CPURegsRegisterClass->contains(Reg))
+ break;
+
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ if (Mips::AFGR64RegisterClass->contains(Reg)) {
+ FPUBitmask |= (3 << RegNum);
+ CSFPRegsSize += AFGR64RegSize;
+ HasAFGR64Reg = true;
+ continue;
+ }
+
+ FPUBitmask |= (1 << RegNum);
+ CSFPRegsSize += FGR32RegSize;
+ }
+
+ // Set CPU Bitmask.
+ for (; i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // FP Regs are saved right below where the virtual frame pointer points to.
+ FPUTopSavedRegOff = FPUBitmask ?
+ (HasAFGR64Reg ? -AFGR64RegSize : -FGR32RegSize) : 0;
+
+ // CPU Regs are saved below FP Regs.
+ CPUTopSavedRegOff = CPUBitmask ? -CSFPRegsSize - CPURegSize : 0;
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask, O);
+ O << ',' << CPUTopSavedRegOff << '\n';
+
+ // Print FPUBitmask
+ O << "\t.fmask\t"; printHex32(FPUBitmask, O);
+ O << "," << FPUTopSavedRegOff << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::emitFrameDirective() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(*MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+ OutStreamer.EmitRawText("\t.frame\t$" +
+ Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+ "," + Twine(stackSize) + ",$" +
+ Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
+}
+
+/// Emit Set directives.
+const char *MipsAsmPrinter::getCurrentABIString() const {
+ switch (Subtarget->getTargetABI()) {
+ case MipsSubtarget::O32: return "abi32";
+ case MipsSubtarget::N32: return "abiN32";
+ case MipsSubtarget::N64: return "abi64";
+ case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ default: break;
+ }
+
+ llvm_unreachable("Unknown Mips ABI");
+ return NULL;
+}
+
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+ emitFrameDirective();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printSavedRegsBitmask(OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
+ // There are instruction for this macros, but they must
+ // always be at the function end, and we can't emit and
+ // break with BB logic.
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const {
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+ // If the predecessor is a switch statement, assume a jump table
+ // implementation, so it is not a fall through.
+ if (const BasicBlock *bb = Pred->getBasicBlock())
+ if (isa<SwitchInst>(bb->getTerminator()))
+ return false;
+
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Otherwise, check the last instruction.
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+
+ return !I->getDesc().isBarrier();
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+ return false;
+}
+
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ bool closeP = false;
+
+ if (MO.getTargetFlags())
+ closeP = true;
+
+ switch(MO.getTargetFlags()) {
+ case MipsII::MO_GPREL: O << "%gp_rel("; break;
+ case MipsII::MO_GOT_CALL: O << "%call16("; break;
+ case MipsII::MO_GOT: O << "%got("; break;
+ case MipsII::MO_ABS_HI: O << "%hi("; break;
+ case MipsII::MO_ABS_LO: O << "%lo("; break;
+ case MipsII::MO_TLSGD: O << "%tlsgd("; break;
+ case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
+ case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
+ case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break;
+ case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break;
+ case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break;
+ case MipsII::MO_GOT_DISP: O << "%got_disp("; break;
+ case MipsII::MO_GOT_PAGE: O << "%got_page("; break;
+ case MipsII::MO_GOT_OFST: O << "%got_ofst("; break;
+ }
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << '$'
+ << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ O << BA->getName();
+ break;
+ }
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum+1, O);
+ O << "(";
+ printOperand(MI, opNum, O);
+ O << ")";
+}
+
+void MipsAsmPrinter::
+printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand& MO = MI->getOperand(opNum);
+ O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ // FIXME: Use SwitchSection.
+
+ // Tell the assembler which ABI we are using
+ OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+
+ // TODO: handle O64 ABI
+ if (Subtarget->isABI_EABI()) {
+ if (Subtarget->isGP32bit())
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+ }
+
+ // return to previous section
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
+}
+
+MachineLocation
+MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ // TODO: implement
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMipsAsmPrinter() {
+ RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+ RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+ RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target);
+ RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
new file mode 100644
index 0000000..16461ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -0,0 +1,71 @@
+//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mips Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSASMPRINTER_H
+#define MIPSASMPRINTER_H
+
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineInstr;
+class raw_ostream;
+class MachineBasicBlock;
+class Module;
+
+class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+ const MipsSubtarget *Subtarget;
+
+public:
+ explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Mips Assembly Printer";
+ }
+
+ void EmitInstruction(const MachineInstr *MI);
+ void printSavedRegsBitmask(raw_ostream &O);
+ void printHex32(unsigned int Value, raw_ostream &O);
+ void emitFrameDirective();
+ const char *getCurrentABIString() const;
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ void EmitStartOfAsmFile(Module &M);
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 0000000..0ae4ef6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,139 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+ // i32 are returned in registers V0, V1, A0, A1
+ CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in register D0, D1
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips N32/64 Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsN : CallingConv<[
+ // FIXME: Handle byval, complex and float double parameters.
+
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64],
+ [D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64]>>,
+
+ // f32 arguments are passed in single precision FP registers.
+ CCIfType<[f32], CCAssignToRegWithShadow<[F12, F13, F14, F15,
+ F16, F17, F18, F19],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // f64 arguments are passed in double precision FP registers.
+ CCIfType<[f64], CCAssignToRegWithShadow<[D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>
+]>;
+
+def RetCC_MipsN : CallingConv<[
+ // FIXME: Handle complex and float double return values.
+
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // i64 are returned in registers V0_64, V1_64
+ CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in registers D0, D2
+ CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ // Single fp arguments are passed in pairs within 32-bit mode
+ CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+ CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+ CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+ // The first 4 double fp arguments are passed in single fp registers.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>>
+]>;
+
+def RetCC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+ CCDelegateTo<RetCC_MipsO32>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
new file mode 100644
index 0000000..9220d9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -0,0 +1,245 @@
+//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Mips machine instructions
+// into relocatable machine code.
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+
+#include "llvm/CodeGen/MachineOperand.h"
+
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+class MipsCodeEmitter : public MachineFunctionPass {
+ MipsJITInfo *JTI;
+ const MipsInstrInfo *II;
+ const TargetData *TD;
+ const MipsSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo> ();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ public:
+ MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
+ MachineFunctionPass(ID), JTI(0),
+ II((const MipsInstrInfo *) tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Mips Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Word);
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ };
+}
+
+char MipsCodeEmitter::ID = 0;
+
+bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo();
+ II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo();
+ TD = ((const MipsTargetMachine&) MF.getTarget()).getTargetData();
+ Subtarget = &TM.getSubtarget<MipsSubtarget> ();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ JTI->Initialize(MF, IsPIC);
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB){
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ // NOTE: This relocations are for static.
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ uint64_t Form = TSFlags & MipsII::FormMask;
+ if (Form == MipsII::FrmJ)
+ return Mips::reloc_mips_26;
+ if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
+ && MI.getDesc().isBranch())
+ return Mips::reloc_mips_branch;
+ if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
+ return Mips::reloc_mips_hi;
+ return Mips::reloc_mips_lo;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return MipsRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0, MayNeedFarStub));
+}
+
+void MipsCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, 0, 0, false));
+}
+
+void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, false));
+}
+
+void MipsCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, false));
+}
+
+void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB));
+}
+
+void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ // Skip pseudo instructions.
+ if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+ return;
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+
+ switch (MI.getOpcode()) {
+ default:
+ emitWordLE(getBinaryCodeForInstr(MI));
+ break;
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void MipsCodeEmitter::emitWordLE(unsigned Word) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Word) << "\n");
+ MCE.emitWordLE(Word);
+}
+
+/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
+/// code to the specified MCE object.
+FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new MipsCodeEmitter(TM, JCE);
+}
+
+unsigned MipsCodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {
+ // this function will be automatically generated by the CodeEmitterGenerator
+ // using TableGen
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 0000000..be3b7a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,256 @@
+//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with useful instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
+ " are not NOP.");
+
+static cl::opt<bool> EnableDelaySlotFiller(
+ "enable-mips-delay-filler",
+ cl::init(false),
+ cl::desc("Fill the Mips delay slots useful instructions."),
+ cl::Hidden);
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ MachineBasicBlock::iterator LastFiller;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ bool
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot,
+ MachineBasicBlock::iterator &Filler);
+
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+bool Filler::
+runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ LastFiller = MBB.end();
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ ++FilledSlots;
+ Changed = true;
+
+ MachineBasicBlock::iterator D;
+
+ if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
+ MBB.splice(llvm::next(I), &MBB, D);
+ ++UsefulSlots;
+ }
+ else
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+
+ // Record the filler instruction that filled the delay slot.
+ // The instruction after it will be visited in the next iteration.
+ LastFiller = ++I;
+ }
+ return Changed;
+
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
+
+bool Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot,
+ MachineBasicBlock::iterator &Filler) {
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) {
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+ // Convert to forward iterator.
+ MachineBasicBlock::iterator FI(llvm::next(I).base());
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || FI == LastFiller
+ || I->getDesc().isPseudo()
+ //
+ // Should not allow:
+ // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
+ // list. TBD.
+ )
+ break;
+
+ if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(FI, RegDefs, RegUses);
+ continue;
+ }
+
+ Filler = FI;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses) {
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ MCInstrDesc MCID = candidate->getDesc();
+ // Loads or stores cannot be moved past a store to the delay slot
+ // and stores cannot be moved past a load.
+ if (MCID.mayLoad()) {
+ if (sawStore)
+ return true;
+ sawLoad = true;
+ }
+
+ if (MCID.mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ assert((!MCID.isCall() && !MCID.isReturn()) &&
+ "Cannot put calls or returns in delay slot.");
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()))
+ continue; // skip
+
+ if (MO.isDef()) {
+ // check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ // check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses) {
+ // If MI is a call or return, just examine the explicit non-variadic operands.
+ MCInstrDesc MCID = MI->getDesc();
+ unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
+ MI->getNumOperands();
+
+ // Add RA to RegDefs to prevent users of RA from going into delay slot.
+ if (MCID.isCall())
+ RegDefs.insert(Mips::RA);
+
+ for (unsigned i = 0; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()))
+ continue;
+
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ else if (MO.isUse())
+ RegUses.insert(Reg);
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
+ if (RegSet.count(Reg))
+ return true;
+ // check Aliased Registers
+ for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ *Alias; ++Alias)
+ if (RegSet.count(*Alias))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp b/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp
new file mode 100644
index 0000000..03d922f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -0,0 +1,94 @@
+//===-- MipsEmitGPRestore.cpp - Emit GP restore instruction----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass emits instructions that restore $gp right
+// after jalr instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "emit-gp-restore"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+ struct Inserter : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Inserter(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Emit GP Restore";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char Inserter::ID = 0;
+} // end of anonymous namespace
+
+bool Inserter::runOnMachineFunction(MachineFunction &F) {
+ if (TM.getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ bool Changed = false;
+ int FI = F.getInfo<MipsFunctionInfo>()->getGPFI();
+
+ for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock& MBB = *MFI;
+ MachineBasicBlock::iterator I = MFI->begin();
+
+ // If MBB is a landing pad, insert instruction that restores $gp after
+ // EH_LABEL.
+ if (MBB.isLandingPad()) {
+ // Find EH_LABEL first.
+ for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ;
+
+ // Insert lw.
+ ++I;
+ DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+ .addImm(0);
+ Changed = true;
+ }
+
+ while (I != MFI->end()) {
+ if (I->getOpcode() != Mips::JALR) {
+ ++I;
+ continue;
+ }
+
+ DebugLoc dl = I->getDebugLoc();
+ // emit lw $gp, ($gp save slot on stack) after jalr
+ BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+ .addImm(0);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// createMipsEmitGPRestorePass - Returns a pass that emits instructions that
+/// restores $gp clobbered by jalr instructions.
+FunctionPass *llvm::createMipsEmitGPRestorePass(MipsTargetMachine &tm) {
+ return new Inserter(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
new file mode 100644
index 0000000..a622258
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -0,0 +1,117 @@
+//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands pseudo instructions into target instructions after register
+// allocation but before post-RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-expand-pseudo"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+ struct MipsExpandPseudo : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ MipsExpandPseudo(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips PseudoInstrs Expansion";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ private:
+ void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);
+ void ExpandExtractElementF64(MachineBasicBlock&,
+ MachineBasicBlock::iterator);
+ };
+ char MipsExpandPseudo::ID = 0;
+} // end of anonymous namespace
+
+bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) {
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)
+ Changed |= runOnMachineBasicBlock(*I);
+
+ return Changed;
+}
+
+bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ const MCInstrDesc& MCid = I->getDesc();
+
+ switch(MCid.getOpcode()) {
+ default:
+ ++I;
+ continue;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, I);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, I);
+ break;
+ }
+
+ // delete original instr
+ MBB.erase(I++);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const unsigned* SubReg =
+ TM.getRegisterInfo()->getSubRegisters(DstReg);
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);
+}
+
+void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+ const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
+}
+
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
+/// instrs into real instrs
+FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
+ return new MipsExpandPseudo(tm);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 0000000..22d1e47
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,330 @@
+//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+// The stack frame required by the ABI (after call):
+// Offset
+//
+// 0 ----------
+// 4 Args to pass
+// . saved $GP (used in PIC)
+// . Alloca allocations
+// . Local Area
+// . CPU "Callee Saved" Registers
+// . saved FP
+// . saved RA
+// . FPU "Callee Saved" Registers
+// StackSize -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: 4
+// sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
+ || MFI->isFrameAddressTaken();
+}
+
+bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
+ return true;
+}
+
+static unsigned AlignOffset(unsigned Offset, unsigned Align) {
+ return (Offset + Align - 1) / Align * Align;
+}
+
+// expand pair of register and immediate if the immediate doesn't fit in the
+// 16-bit offset field.
+// e.g.
+// if OrigImm = 0x10000, OrigReg = $sp:
+// generate the following sequence of instrs:
+// lui $at, hi(0x10000)
+// addu $at, $sp, $at
+//
+// (NewReg, NewImm) = ($at, lo(Ox10000))
+// return true
+static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
+ unsigned& NewReg, int& NewImm,
+ MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ // OrigImm fits in the 16-bit field
+ if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+ NewReg = OrigReg;
+ NewImm = OrigImm;
+ return false;
+ }
+
+ MachineFunction* MF = MBB.getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ DebugLoc DL = I->getDebugLoc();
+ int ImmLo = (short)(OrigImm & 0xffff);
+ int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+ ((OrigImm & 0x8000) != 0);
+
+ // FIXME: change this when mips goes MC".
+ BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
+ BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+ BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+ .addReg(Mips::AT);
+ NewReg = Mips::AT;
+ NewImm = ImmLo;
+
+ return true;
+}
+
+void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ bool ATUsed;
+
+ // First, compute final stack size.
+ unsigned RegSize = STI.isGP32bit() ? 4 : 8;
+ unsigned StackAlign = getStackAlignment();
+ unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ?
+ (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) :
+ MipsFI->getMaxCallFrameSize();
+ unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) +
+ AlignOffset(MFI->getStackSize(), StackAlign);
+
+ // Update stack size
+ MFI->setStackSize(StackSize);
+
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+ // TODO: check need from GP here.
+ if (isPIC && STI.isABI_O32())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+ .addReg(RegInfo->getPICCallReg());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack : addi sp, sp, (-imm)
+ ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
+ MBBI);
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(NewReg).addImm(NewImm);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (CSI.size()) {
+ // Find the instruction past the last instruction that saves a callee-saved
+ // register to the stack.
+ for (unsigned i = 0; i < CSI.size(); ++i)
+ ++MBBI;
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+
+ // If Reg is a double precision register, emit two cfa_offsets,
+ // one for each of the paired single precision registers.
+ if (Mips::AFGR64RegisterClass->contains(Reg)) {
+ const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+ MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+ MachineLocation SrcML0(*SubRegs);
+ MachineLocation SrcML1(*(SubRegs + 1));
+
+ if (!STI.isLittle())
+ std::swap(SrcML0, SrcML1);
+
+ Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+ Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+ }
+ else {
+ // Reg is either in CPURegs or FGR32.
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(Reg);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+ }
+ }
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+ .addReg(Mips::SP).addReg(Mips::ZERO);
+
+ // emit ".cfi_def_cfa_register $fp"
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+ DstML = MachineLocation(Mips::FP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+ }
+
+ // Restore GP from the saved stack location
+ if (MipsFI->needGPSaveRestore()) {
+ unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset);
+
+ if (Offset >= 0x8000) {
+ BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO));
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+ }
+ }
+}
+
+void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the number of bytes from FrameInfo
+ unsigned StackSize = MFI->getStackSize();
+
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ bool ATUsed = false;
+
+ // if framepointer enabled, restore the stack pointer.
+ if (hasFP(MF)) {
+ // Find the first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instruction "move $sp, $fp" at this location.
+ BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP)
+ .addReg(Mips::FP).addReg(Mips::ZERO);
+ }
+
+ // adjust stack : insert addi sp, sp, (imm)
+ if (StackSize) {
+ ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB,
+ MBBI);
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(NewReg).addImm(NewImm);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+ }
+}
+
+void MipsFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineRegisterInfo& MRI = MF.getRegInfo();
+
+ // FIXME: remove this code if register allocator can correctly mark
+ // $fp and $ra used or unused.
+
+ // Mark $fp and $ra as used or unused.
+ if (hasFP(MF))
+ MRI.setPhysRegUsed(Mips::FP);
+
+ // The register allocator might determine $ra is used after seeing
+ // instruction "jr $ra", but we do not want PrologEpilogInserter to insert
+ // instructions to save/restore $ra unless there is a function call.
+ // To correct this, $ra is explicitly marked unused if there is no
+ // function call.
+ if (MF.getFrameInfo()->hasCalls())
+ MRI.setPhysRegUsed(Mips::RA);
+ else
+ MRI.setPhysRegUnused(Mips::RA);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 0000000..c249756
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,49 @@
+//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+ const MipsSubtarget &STI;
+
+public:
+ explicit MipsFrameLowering(const MipsSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
+ STI(sti) {
+ }
+
+ bool targetHandlesStackFrameRounding() const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 0000000..9c831ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,349 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MipsTargetMachine.
+ MipsTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MipsTargetMachine &getTargetMachine() {
+ return static_cast<const MipsTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MipsInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
+
+ // getI32Imm - Return a target constant with the specified
+ // value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ EVT ValTy = Addr.getValueType();
+ unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64;
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Addr.getOpcode() == MipsISD::WrapperPIC) {
+ Base = CurDAG->getRegister(GPReg, ValTy);
+ Offset = Addr.getOperand(0);
+ return true;
+ }
+ } else {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
+ Base = CurDAG->getRegister(GPReg, ValTy);
+ Offset = Addr;
+ return true;
+ }
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
+ Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
+ Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+ SDValue LoVal = Addr.getOperand(1);
+ if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
+ isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
+ Base = Addr.getOperand(0);
+ Offset = LoVal.getOperand(0);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::ADDu;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SUBu;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+ SDValue(Carry,0), RHS);
+
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
+ LHS, SDValue(AddCarry,0));
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ assert(Node->getValueType(0) != MVT::i64 &&
+ "64-bit multiplication with two results not handled.");
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+
+ unsigned Op;
+ Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+
+ SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+
+ SDValue InFlag = SDValue(Mul, 0);
+ SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
+ MVT::Glue, InFlag);
+ InFlag = SDValue(Lo,1);
+ SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
+
+ return NULL;
+ }
+
+ /// Special Muls
+ case ISD::MUL:
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32)
+ break;
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) &&
+ "64-bit MULH* not handled.");
+ EVT Ty = Node->getValueType(0);
+ SDValue MulOp1 = Node->getOperand(0);
+ SDValue MulOp2 = Node->getOperand(1);
+
+ unsigned MulOp = (Opcode == ISD::MULHU ?
+ Mips::MULTu :
+ (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT));
+ SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
+ MVT::Glue, MulOp1, MulOp2);
+
+ SDValue InFlag = SDValue(MulNode, 0);
+
+ if (Opcode == ISD::MUL) {
+ unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ return CurDAG->getMachineNode(Opc, dl, Ty, InFlag);
+ }
+ else
+ return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+ }
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Mips::ZERO, MVT::i32);
+ return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
+ Zero);
+ }
+ break;
+ }
+
+ case MipsISD::ThreadPointer: {
+ unsigned SrcReg = Mips::HWR29;
+ unsigned DestReg = Mips::V1;
+ SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
+ Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return ResNode.getNode();
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+ return new MipsDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 0000000..1932e74
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,2581 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+// If I is a shifted mask, set the size (Size) and the first bit of the
+// mask (Pos), and return true.
+// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
+static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+ if (!isUInt<32>(I) || !isShiftedMask_32(I))
+ return false;
+
+ Size = CountPopulation_32(I);
+ Pos = CountTrailingZeros_32(I);
+ return true;
+}
+
+const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MipsISD::JmpLink: return "MipsISD::JmpLink";
+ case MipsISD::Hi: return "MipsISD::Hi";
+ case MipsISD::Lo: return "MipsISD::Lo";
+ case MipsISD::GPRel: return "MipsISD::GPRel";
+ case MipsISD::TlsGd: return "MipsISD::TlsGd";
+ case MipsISD::TprelHi: return "MipsISD::TprelHi";
+ case MipsISD::TprelLo: return "MipsISD::TprelLo";
+ case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
+ case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
+ case MipsISD::FPCmp: return "MipsISD::FPCmp";
+ case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
+ case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
+ case MipsISD::FPRound: return "MipsISD::FPRound";
+ case MipsISD::MAdd: return "MipsISD::MAdd";
+ case MipsISD::MAddu: return "MipsISD::MAddu";
+ case MipsISD::MSub: return "MipsISD::MSub";
+ case MipsISD::MSubu: return "MipsISD::MSubu";
+ case MipsISD::DivRem: return "MipsISD::DivRem";
+ case MipsISD::DivRemU: return "MipsISD::DivRemU";
+ case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
+ case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
+ case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
+ case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
+ case MipsISD::Sync: return "MipsISD::Sync";
+ case MipsISD::Ext: return "MipsISD::Ext";
+ case MipsISD::Ins: return "MipsISD::Ins";
+ default: return NULL;
+ }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM)
+ : TargetLowering(TM, new MipsTargetObjectFile()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()),
+ HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) {
+
+ // Mips does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+ addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
+ else
+ addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+ }
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // MIPS doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+ // Mips Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Operations not directly supported by Mips.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+ if (!Subtarget->hasMips32r2())
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+
+ if (!Subtarget->hasMips64r2())
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ setInsertFencesForAtomic(true);
+
+ if (Subtarget->isSingleFloat())
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ if (!Subtarget->hasSEInReg()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ }
+
+ if (!Subtarget->hasBitCount())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ if (!Subtarget->hasSwap())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
+ setTargetDAGCombine(ISD::SDIVREM);
+ setTargetDAGCombine(ISD::UDIVREM);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+
+ setMinFunctionAlignment(2);
+
+ setStackPointerRegisterToSaveRestore(Mips::SP);
+ computeRegisterProperties();
+
+ setExceptionPointerRegister(Mips::A0);
+ setExceptionSelectorRegister(Mips::A1);
+}
+
+bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+ return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16;
+}
+
+EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+// SelectMadd -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode* MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDValue Chain = CurDAG->getEntryNode();
+ DebugLoc dl = ADDENode->getDebugLoc();
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, dl,
+ MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ADDCNode->getOperand(1),// Lo0
+ ADDENode->getOperand(1));// Hi0
+
+ // create CopyFromReg nodes
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ MAdd);
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ Mips::HI, MVT::i32,
+ CopyFromLo.getValue(2));
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
+
+ if (!SDValue(ADDENode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
+
+ return true;
+}
+
+// SelectMsub -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode* MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDValue Chain = CurDAG->getEntryNode();
+ DebugLoc dl = SUBENode->getDebugLoc();
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, dl,
+ MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ SUBCNode->getOperand(0),// Lo0
+ SUBENode->getOperand(0));// Hi0
+
+ // create CopyFromReg nodes
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ MSub);
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ Mips::HI, MVT::i32,
+ CopyFromLo.getValue(2));
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
+
+ if (!SDValue(SUBENode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
+
+ return true;
+}
+
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && SelectMadd(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && SelectMsub(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ EVT Ty = N->getValueType(0);
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
+ unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+ MipsISD::DivRemU;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+ N->getOperand(0), N->getOperand(1));
+ SDValue InChain = DAG.getEntryNode();
+ SDValue InGlue = DivRem;
+
+ // insert MFLO
+ if (N->hasAnyUseOfValue(0)) {
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty,
+ InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+ InChain = CopyFromLo.getValue(1);
+ InGlue = CopyFromLo.getValue(2);
+ }
+
+ // insert MFHI
+ if (N->hasAnyUseOfValue(1)) {
+ SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+ HI, Ty, InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+ }
+
+ return SDValue();
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return Mips::FCOND_OEQ;
+ case ISD::SETUNE: return Mips::FCOND_UNE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return Mips::FCOND_OLT;
+ case ISD::SETGT:
+ case ISD::SETOGT: return Mips::FCOND_OGT;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return Mips::FCOND_OGE;
+ case ISD::SETULT: return Mips::FCOND_ULT;
+ case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETUGT: return Mips::FCOND_UGT;
+ case ISD::SETUGE: return Mips::FCOND_UGE;
+ case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETO: return Mips::FCOND_OR;
+ case ISD::SETNE:
+ case ISD::SETONE: return Mips::FCOND_ONE;
+ case ISD::SETUEQ: return Mips::FCOND_UEQ;
+ }
+}
+
+
+// Returns true if condition code has to be inverted.
+static bool InvertFPCondCode(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return false;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return true;
+
+ assert(false && "Illegal Condition Code");
+ return false;
+}
+
+// Creates and returns an FPCmp node from a setcc node.
+// Returns Op if setcc is not a floating point comparison.
+static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+ // must be a SETCC node
+ if (Op.getOpcode() != ISD::SETCC)
+ return Op;
+
+ SDValue LHS = Op.getOperand(0);
+
+ if (!LHS.getValueType().isFloatingPoint())
+ return Op;
+
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
+ // node if necessary.
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+ DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+// Creates and returns a CMovFPT/F node.
+static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+ SDValue False, DebugLoc DL) {
+ bool invert = InvertFPCondCode((Mips::CondCode)
+ cast<ConstantSDNode>(Cond.getOperand(2))
+ ->getSExtValue());
+
+ return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
+ True.getValueType(), True, False, Cond);
+}
+
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return SDValue();
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+}
+
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ // Pattern match EXT.
+ // $dst = and ((sra or srl) $src , pos), (2**size - 1)
+ // => ext $dst, $src, size, pos
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
+
+ // Op's first operand must be a shift right.
+ if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The second operand of the shift must be an immediate.
+ uint64_t Pos;
+ ConstantSDNode *CN;
+ if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ uint64_t SMPos, SMSize;
+ // Op's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
+ !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ return SDValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || Pos + SMSize > 32)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
+ ShiftRight.getOperand(0),
+ DAG.getConstant(Pos, MVT::i32),
+ DAG.getConstant(SMSize, MVT::i32));
+}
+
+static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ // Pattern match INS.
+ // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
+ // where mask1 = (2**size - 1) << pos, mask0 = ~mask1
+ // => ins $dst, $src, size, pos, $src1
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
+ uint64_t SMPos0, SMSize0, SMPos1, SMSize1;
+ ConstantSDNode *CN;
+
+ // See if Op's first operand matches (and $src1 , mask0).
+ if (And0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
+ !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ return SDValue();
+
+ // See if Op's second operand matches (and (shl $src, pos), mask1).
+ if (And1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
+ !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ return SDValue();
+
+ // The shift masks must have the same position and size.
+ if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
+ return SDValue();
+
+ SDValue Shl = And1.getOperand(0);
+ if (Shl.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
+ return SDValue();
+
+ unsigned Shamt = CN->getZExtValue();
+
+ // Return if the shift amount and the first bit position of mask are not the
+ // same.
+ if (Shamt != SMPos0)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
+ Shl.getOperand(0),
+ DAG.getConstant(SMPos0, MVT::i32),
+ DAG.getConstant(SMSize0, MVT::i32),
+ And0.getOperand(0));
+}
+
+SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+ const {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned opc = N->getOpcode();
+
+ switch (opc) {
+ default: break;
+ case ISD::ADDE:
+ return PerformADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::SUBE:
+ return PerformSUBECombine(N, DAG, DCI, Subtarget);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+ case ISD::SETCC:
+ return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case ISD::AND:
+ return PerformANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR:
+ return PerformORCombine(N, DAG, DCI, Subtarget);
+ }
+
+ return SDValue();
+}
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode())
+ {
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value. It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+// Get fp branch code (not opcode) from condition code.
+static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return Mips::BRANCH_T;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return Mips::BRANCH_F;
+
+ return Mips::BRANCH_INVALID;
+}
+
+static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
+ DebugLoc dl,
+ const MipsSubtarget* Subtarget,
+ const TargetInstrInfo *TII,
+ bool isFPCmp, unsigned Opc) {
+ // There is no need to expand CMov instructions if target has
+ // conditional moves.
+ if (Subtarget->hasCondMov())
+ return BB;
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Emit the right instruction according to the type of the operands compared
+ if (isFPCmp)
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ else
+ BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
+ .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ if (isFPCmp)
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+ else
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ switch (MI->getOpcode()) {
+ default:
+ assert(false && "Unexpected instr type to insert");
+ return NULL;
+ case Mips::MOVT:
+ case Mips::MOVT_S:
+ case Mips::MOVT_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1F);
+ case Mips::MOVF:
+ case Mips::MOVF_S:
+ case Mips::MOVF_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1T);
+ case Mips::MOVZ_I:
+ case Mips::MOVZ_S:
+ case Mips::MOVZ_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BNE);
+ case Mips::MOVN_I:
+ case Mips::MOVN_S:
+ case Mips::MOVN_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BEQ);
+
+ case Mips::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+
+ case Mips::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+
+ case Mips::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+
+ case Mips::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+
+ case Mips::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, 0, true);
+
+ case Mips::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+
+ case Mips::ATOMIC_SWAP_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, 0);
+ case Mips::ATOMIC_SWAP_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, 0);
+ case Mips::ATOMIC_SWAP_I32:
+ return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case Mips::ATOMIC_CMP_SWAP_I8:
+ return EmitAtomicCmpSwapPartword(MI, BB, 1);
+ case Mips::ATOMIC_CMP_SWAP_I16:
+ return EmitAtomicCmpSwapPartword(MI, BB, 2);
+ case Mips::ATOMIC_CMP_SWAP_I32:
+ return EmitAtomicCmpSwap(MI, BB, 4);
+ }
+}
+
+// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
+// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
+ assert(Size == 4 && "Unsupported size for EmitAtomicBinary.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned OldVal = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Incr = MI->getOperand(2).getReg();
+
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(exitMBB);
+
+ // loopMBB:
+ // ll oldval, 0(ptr)
+ // <binop> storeval, oldval, incr
+ // sc success, storeval, 0(ptr)
+ // beq success, $0, loopMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0);
+ if (Nand) {
+ // and andres, oldval, incr
+ // nor storeval, $0, andres
+ BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal)
+ .addReg(Mips::ZERO).addReg(AndRes);
+ } else if (BinOpcode) {
+ // <binop> storeval, oldval, incr
+ BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
+ } else {
+ StoreVal = Incr;
+ }
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
+ assert((Size == 1 || Size == 2) &&
+ "Unsupported size for EmitAtomicBinaryPartial.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Incr = MI->getOperand(2).getReg();
+
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
+ unsigned Mask = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned BinOpRes = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loopMBB);
+ MF->insert(It, sinkMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
+ // thisMBB:
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
+ // nor mask2,$0,mask
+ // sll incr2,incr,shiftamt
+
+ int64_t MaskImm = (Size == 1) ? 255 : 65535;
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
+ BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
+
+
+ // atomic.load.binop
+ // loopMBB:
+ // ll oldval,0(alignedaddr)
+ // binop binopres,oldval,incr2
+ // and newval,binopres,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
+ // atomic.swap
+ // loopMBB:
+ // ll oldval,0(alignedaddr)
+ // and newval,incr2,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ if (Nand) {
+ // and andres, oldval, incr2
+ // nor binopres, $0, andres
+ // and newval, binopres, mask
+ BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes)
+ .addReg(Mips::ZERO).addReg(AndRes);
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ } else if (BinOpcode) {
+ // <binop> binopres, oldval, incr2
+ // and newval, binopres, mask
+ BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ } else {// atomic.swap
+ // and newval, incr2, mask
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
+ }
+
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal0).addReg(NewVal);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+
+ // sinkMBB:
+ // and maskedoldval1,oldval,mask
+ // srl srlres,maskedoldval1,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
+ int64_t ShiftImm = (Size == 1) ? 24 : 16;
+
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal1);
+ BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned OldVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
+
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(exitMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(exitMBB);
+
+ // loop1MBB:
+ // ll dest, 0(ptr)
+ // bne dest, oldval, exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BNE))
+ .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
+
+ // loop2MBB:
+ // sc success, newval, 0(ptr)
+ // beq success, $0, loop1MBB
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(NewVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ assert((Size == 1 || Size == 2) &&
+ "Unsupported size for EmitAtomicCmpSwapPartial.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned CmpVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
+
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
+ unsigned Mask = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, sinkMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(sinkMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
+ // FIXME: computation of newval2 can be moved to loop2MBB.
+ // thisMBB:
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
+ // nor mask2,$0,mask
+ // andi maskedcmpval,cmpval,255
+ // sll shiftedcmpval,maskedcmpval,shiftamt
+ // andi maskednewval,newval,255
+ // sll shiftednewval,maskednewval,shiftamt
+ int64_t MaskImm = (Size == 1) ? 255 : 65535;
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
+ BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal)
+ .addReg(CmpVal).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal)
+ .addReg(ShiftAmt).addReg(MaskedCmpVal);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal)
+ .addReg(NewVal).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal)
+ .addReg(ShiftAmt).addReg(MaskedNewVal);
+
+ // loop1MBB:
+ // ll oldval,0(alginedaddr)
+ // and maskedoldval0,oldval,mask
+ // bne maskedoldval0,shiftedcmpval,sinkMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::BNE))
+ .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
+
+ // loop2MBB:
+ // and maskedoldval1,oldval,mask2
+ // or storeval,maskedoldval1,shiftednewval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loop1MBB
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
+
+ // sinkMBB:
+ // srl srlres,maskedoldval0,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
+ int64_t ShiftImm = (Size == 1) ? 24 : 16;
+
+ BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal0);
+ BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
+ cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
+ "Cannot lower if the alignment of the allocated space is larger than \
+ that of the stack.");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get a reference from Mips stack pointer
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
+ SDValue());
+
+ // This node always has two return values: a new stack pointer
+ // value and a chain
+ SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+ SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+ SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+ return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
+}
+
+SDValue MipsTargetLowering::
+LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ // The first operand is the chain, the second is the condition, the third is
+ // the block to branch to if the condition is true.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+
+ // Return if flag is not set by a floating point comparison.
+ if (CondRes.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue CCNode = CondRes.getOperand(2);
+ Mips::CondCode CC =
+ (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+ SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+
+ return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
+
+ // Return if flag is not set by a floating point comparison.
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+ Op.getDebugLoc());
+}
+
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+
+ MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
+
+ // %gp_rel relocation
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_GPREL);
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+ }
+ // %hi/%lo relocation
+ SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_HI);
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_LO);
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+ }
+
+ EVT ValTy = Op.getValueType();
+ bool HasGotOfst = (GV->hasInternalLinkage() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV)));
+ unsigned GotFlag = IsN64 ?
+ (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
+ MipsII::MO_GOT;
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
+ GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
+ SDValue ResNode = DAG.getLoad(ValTy, dl,
+ DAG.getEntryNode(), GA, MachinePointerInfo(),
+ false, false, 0);
+ // On functions and global targets not internal linked only
+ // a load from got/GP is necessary for PIC to work.
+ if (!HasGotOfst)
+ return ResNode;
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0,
+ IsN64 ? MipsII::MO_GOT_OFST :
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo);
+ return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo);
+}
+
+SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ // %hi/%lo relocation
+ SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_HI);
+ SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_LO);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ }
+
+ SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_GOT);
+ BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset);
+ SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_LO);
+ SDValue Load = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), BAGOTOffset,
+ MachinePointerInfo(), false, false, 0);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ // If the relocation model is PIC, use the General Dynamic TLS Model,
+ // otherwise use the Initial Exec or Local Exec TLS Model.
+ // TODO: implement Local Dynamic TLS model
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // General Dynamic TLS Model
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
+ 0, MipsII::MO_TLSGD);
+ SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
+ SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
+ SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(DAG.getEntryNode(),
+ (Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false, true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+ dl);
+
+ return CallResult.first;
+ }
+
+ SDValue Offset;
+ if (GV->isDeclaration()) {
+ // Initial Exec TLS Model
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_GOTTPREL);
+ Offset = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), TGA, MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ // Local Exec TLS Model
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_TPREL_HI);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_TPREL_LO);
+ SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+ SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+ Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ }
+
+ SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue MipsTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+
+ if (!IsPIC) {
+ SDValue Ops[] = { JTI };
+ HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+ } else {// Emit Load from Global Pointer
+ JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
+ HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+ MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+ return ResNode;
+}
+
+SDValue MipsTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = N->getConstVal();
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ // gp_rel relocation
+ // FIXME: we should reference the constant pool using small data sections,
+ // but the asm printer currently doesn't support this feature without
+ // hacking it. This feature should come soon so we can uncomment the
+ // stuff below.
+ //if (IsInSmallSection(C->getType())) {
+ // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+ // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_HI);
+ SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_LO);
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+ } else {
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_GOT);
+ CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP);
+ SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+ CP, MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ }
+
+ return ResNode;
+}
+
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+}
+
+static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
+ // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
+ SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
+ DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
+ DAG.getConstant(0x80000000, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+}
+
+static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
+ // FIXME:
+ // Use ext/ins instructions if target architecture is Mips32r2.
+ // Eliminate redundant mfc1 and mtc1 instructions.
+ unsigned LoIdx = 0, HiIdx = 1;
+
+ if (!isLittle)
+ std::swap(LoIdx, HiIdx);
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getConstant(LoIdx, MVT::i32));
+ SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32));
+ SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32));
+ SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0,
+ DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1,
+ DAG.getConstant(0x80000000, MVT::i32));
+ SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
+
+ if (!isLittle)
+ std::swap(Word0, Word1);
+
+ return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1);
+}
+
+SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
+ const {
+ EVT Ty = Op.getValueType();
+
+ assert(Ty == MVT::f32 || Ty == MVT::f64);
+
+ if (Ty == MVT::f32)
+ return LowerFCOPYSIGN32(Op, DAG);
+ else
+ return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+}
+
+SDValue MipsTargetLowering::
+LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ // check the depth
+ assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
+ "Frame address can only be determined for current frame.");
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT);
+ return FrameAddr;
+}
+
+// TODO: set SType according to the desired memory barrier behavior.
+SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ unsigned SType = 0;
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
+SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ // FIXME: Need pseudo-fence for 'singlethread' fences
+ // FIXME: Set SType for weaker fences where supported/appropriate.
+ unsigned SType = 0;
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+// an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// go to stack.
+//
+// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+ static const unsigned IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned F32Regs[] = {
+ Mips::F12, Mips::F14
+ };
+ static const unsigned F64Regs[] = {
+ Mips::D6, Mips::D7
+ };
+
+ // ByVal Args
+ if (ArgFlags.isByVal()) {
+ State.HandleByVal(ValNo, ValVT, LocVT, LocInfo,
+ 1 /*MinSize*/, 4 /*MinAlign*/, ArgFlags);
+ unsigned NextReg = (State.getNextStackOffset() + 3) / 4;
+ for (unsigned r = State.getFirstUnallocated(IntRegs, IntRegsSize);
+ r < std::min(IntRegsSize, NextReg); ++r)
+ State.AllocateReg(IntRegs[r]);
+ return false;
+ }
+
+ // Promote i8 and i16
+ if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ unsigned Reg;
+
+ // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following
+ // is true: function is vararg, argument is 3rd or higher, there is previous
+ // argument which is not f32 or f64.
+ bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1
+ || State.getFirstUnallocated(F32Regs, FloatRegsSize) != ValNo;
+ unsigned OrigAlign = ArgFlags.getOrigAlign();
+ bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+
+ if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ // If this is the first part of an i64 arg,
+ // the allocated register must be either A0 or A2.
+ if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3))
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
+ // Allocate int register and shadow next int register. If first
+ // available register is Mips::A1 or Mips::A3, shadow it too.
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg == Mips::A1 || Reg == Mips::A3)
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) {
+ // we are guaranteed to find an available float register
+ if (ValVT == MVT::f32) {
+ Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+ // Shadow int register
+ State.AllocateReg(IntRegs, IntRegsSize);
+ } else {
+ Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+ // Shadow int registers
+ unsigned Reg2 = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg2 == Mips::A1 || Reg2 == Mips::A3)
+ State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ }
+ } else
+ llvm_unreachable("Cannot handle this ValVT.");
+
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ unsigned Offset = State.AllocateStack(SizeInBytes, OrigAlign);
+
+ if (!Reg)
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static const unsigned O32IntRegsSize = 4;
+
+static const unsigned O32IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+};
+
+// Return next O32 integer argument register.
+static unsigned getNextIntArgReg(unsigned Reg) {
+ assert((Reg == Mips::A0) || (Reg == Mips::A2));
+ return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
+}
+
+// Write ByVal Arg to arg registers and stack.
+static void
+WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+ SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
+ SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ MVT PtrType, bool isLittle) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ unsigned Offset = 0;
+ uint32_t RemainingSize = Flags.getByValSize();
+ unsigned ByValAlign = Flags.getByValAlign();
+
+ // Copy the first 4 words of byval arg to registers A0 - A3.
+ // FIXME: Use a stricter alignment if it enables better optimization in passes
+ // run later.
+ for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
+ Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
+ MachinePointerInfo(),
+ false, false, std::min(ByValAlign,
+ (unsigned )4));
+ MemOpChains.push_back(LoadVal.getValue(1));
+ unsigned DstReg = O32IntRegs[LocMemOffset / 4];
+ RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
+ }
+
+ if (RemainingSize == 0)
+ return;
+
+ // If there still is a register available for argument passing, write the
+ // remaining part of the structure to it using subword loads and shifts.
+ if (LocMemOffset < 4 * 4) {
+ assert(RemainingSize <= 3 && RemainingSize >= 1 &&
+ "There must be one to three bytes remaining.");
+ unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ unsigned Alignment = std::min(ByValAlign, (unsigned )4);
+ SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ LoadPtr, MachinePointerInfo(),
+ MVT::getIntegerVT(LoadSize * 8), false,
+ false, Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+
+ // If target is big endian, shift it to the most significant half-word or
+ // byte.
+ if (!isLittle)
+ LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
+ DAG.getConstant(32 - LoadSize * 8, MVT::i32));
+
+ Offset += LoadSize;
+ RemainingSize -= LoadSize;
+
+ // Read second subword if necessary.
+ if (RemainingSize != 0) {
+ assert(RemainingSize == 1 && "There must be one byte remaining.");
+ LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ unsigned Alignment = std::min(ByValAlign, (unsigned )2);
+ SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ LoadPtr, MachinePointerInfo(),
+ MVT::i8, false, false, Alignment);
+ MemOpChains.push_back(Subword.getValue(1));
+ // Insert the loaded byte to LoadVal.
+ // FIXME: Use INS if supported by target.
+ unsigned ShiftAmt = isLittle ? 16 : 8;
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
+ }
+
+ unsigned DstReg = O32IntRegs[LocMemOffset / 4];
+ RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
+ return;
+ }
+
+ // Create a fixed object on stack at offset LocMemOffset and copy
+ // remaining part of byval arg to it using memcpy.
+ SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
+ SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
+ ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
+ DAG.getConstant(RemainingSize, MVT::i32),
+ std::min(ByValAlign, (unsigned)4),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+}
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isTailCall.
+SDValue
+MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // MIPs target does not yet support tail call optimization.
+ isTailCall = false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ if (Subtarget->isABI_O32())
+ CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NextStackOffset = CCInfo.getNextStackOffset();
+
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ SDValue Chain, CallSeqStart, ByValChain;
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+ Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
+ ByValChain = InChain;
+
+ // If this is the first call, create a stack frame object that points to
+ // a location to which .cprestore saves $gp.
+ if (IsPIC && !MipsFI->getGPFI())
+ MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
+
+ // Get the frame index of the stack frame object that points to the location
+ // of dynamically allocated area on the stack.
+ int DynAllocFI = MipsFI->getDynAllocFI();
+
+ // Update size of the maximum argument space.
+ // For O32, a minimum of four words (16 bytes) of argument space is
+ // allocated.
+ if (Subtarget->isABI_O32())
+ NextStackOffset = std::max(NextStackOffset, (unsigned)16);
+
+ unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
+
+ if (MaxCallFrameSize < NextStackOffset) {
+ MipsFI->setMaxCallFrameSize(NextStackOffset);
+
+ // Set the offsets relative to $sp of the $gp restore slot and dynamically
+ // allocated stack space. These offsets must be aligned to a boundary
+ // determined by the stack alignment of the ABI.
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+ StackAlignment * StackAlignment;
+
+ if (IsPIC)
+ MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+ MFI->setObjectOffset(DynAllocFI, NextStackOffset);
+ }
+
+ // With EABI is it possible to have 16 args on registers.
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue Arg = OutVals[i];
+ CCValAssign &VA = ArgLocs[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ if (Subtarget->isABI_O32() && VA.isRegLoc()) {
+ if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Arg, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Arg, DAG.getConstant(1, MVT::i32));
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+ unsigned LocRegLo = VA.getLocReg();
+ unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
+ RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
+ RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
+ continue;
+ }
+ }
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // ByVal Arg.
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (Flags.isByVal()) {
+ assert(Subtarget->isABI_O32() &&
+ "No support for ByVal args by ABIs other than O32 yet.");
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI,
+ DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle());
+ continue;
+ }
+
+ // Create the frame index object for this incoming parameter
+ LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+ SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+
+ // Extend range of indices of frame objects for outgoing arguments that were
+ // created during this function call. Skip this step if no such objects were
+ // created.
+ if (LastFI)
+ MipsFI->extendOutArgFIRange(FirstFI, LastFI);
+
+ // If a memcpy has been created to copy a byval arg to a stack, replace the
+ // chain input of CallSeqStart with ByValChain.
+ if (InChain != ByValChain)
+ DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
+ NextStackOffsetVal);
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
+ bool LoadSymAddr = false;
+ SDValue CalleeLo;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0,MipsII:: MO_GOT);
+ CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
+ 0, MipsII::MO_ABS_LO);
+ } else {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0, OpFlag);
+ }
+
+ LoadSymAddr = true;
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), OpFlag);
+ LoadSymAddr = true;
+ }
+
+ SDValue InFlag;
+
+ // Create nodes that load address of callee and copy it to T9
+ if (IsPIC) {
+ if (LoadSymAddr) {
+ // Load callee address
+ Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
+ SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
+
+ // Use GOT+LO if callee has internal linkage.
+ if (CalleeLo.getNode()) {
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
+ Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+ } else
+ Callee = LoadValue;
+ }
+
+ // copy to T9
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+ InFlag = Chain.getValue(1);
+ Callee = DAG.getRegister(Mips::T9, MVT::i32);
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NextStackOffset, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
+ std::vector<SDValue>& OutChains,
+ SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) {
+ unsigned LocMem = VA.getLocMemOffset();
+ unsigned FirstWord = LocMem / 4;
+
+ // copy register A0 - A3 to frame object
+ for (unsigned i = 0; i < NumWords; ++i) {
+ unsigned CurWord = FirstWord + i;
+ if (CurWord >= O32IntRegsSize)
+ break;
+
+ unsigned SrcReg = O32IntRegs[CurWord];
+ unsigned Reg = AddLiveIn(MF, SrcReg, Mips::CPURegsRegisterClass);
+ SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN,
+ DAG.getConstant(i * 4, MVT::i32));
+ SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32),
+ StorePtr, MachinePointerInfo(), false,
+ false, 0);
+ OutChains.push_back(Store);
+ }
+}
+
+/// LowerFormalArguments - transform physical registers into virtual registers
+/// and generate load operations for arguments places on the stack.
+SDValue
+MipsTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setVarArgsFrameIndex(0);
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ if (Subtarget->isABI_O32())
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
+ else
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
+
+ int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function.
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ unsigned ArgReg = VA.getLocReg();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::i64)
+ RC = Mips::CPU64RegsRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = Mips::FGR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass;
+ else
+ llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+ if (Subtarget->isABI_O32()) {
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
+ unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ getNextIntArgReg(ArgReg), RC);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+ if (!Subtarget->isLittle())
+ std::swap(ArgValue, ArgValue2);
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+ ArgValue, ArgValue2);
+ }
+ }
+
+ InVals.push_back(ArgValue);
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ assert(Subtarget->isABI_O32() &&
+ "No support for ByVal args by ABIs other than O32 yet.");
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+ LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
+ true);
+ SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+ InVals.push_back(FIN);
+ ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags);
+
+ continue;
+ }
+
+ // The stack pointer offset is relative to the caller stack frame.
+ LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(LastFI),
+ false, false, 0));
+ }
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ unsigned Reg = MipsFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+ MipsFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ if (isVarArg && Subtarget->isABI_O32()) {
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ unsigned NextStackOffset = CCInfo.getNextStackOffset();
+ assert(NextStackOffset % 4 == 0 &&
+ "NextStackOffset must be aligned to 4-byte boundaries.");
+ LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+ MipsFI->setVarArgsFrameIndex(LastFI);
+
+ // If NextStackOffset is smaller than o32's 16-byte reserved argument area,
+ // copy the integer registers that have not been used for argument passing
+ // to the caller's stack frame.
+ for (; NextStackOffset < 16; NextStackOffset += 4) {
+ TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+ unsigned Idx = NextStackOffset / 4;
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+ LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+ SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ MipsFI->setLastInArgFI(LastFI);
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into $v0.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned Reg = MipsFI->getSRetReturnReg();
+
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on Mips is always a "jr $ra"
+ if (Flag.getNode())
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // Mips specific constrainy
+ // GCC config/mips/constraints.md
+ //
+ // 'd' : An address register. Equivalent to r
+ // unless generating MIPS16 code.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
+ case 'y': // Same as 'r'. Exists for compatibility.
+ case 'r':
+ return std::make_pair(0U, Mips::CPURegsRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, Mips::FGR32RegisterClass);
+ if (VT == MVT::f64)
+ if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ return std::make_pair(0U, Mips::AFGR64RegisterClass);
+ break;
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Mips target isn't yet aware of offsets.
+ return false;
+}
+
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+ if (Imm.isNegZero())
+ return false;
+ return Imm.isZero();
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 0000000..4be3fed5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,201 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+ namespace MipsISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Get the Higher 16 bits from a 32-bit immediate
+ // No relation with Mips Hi register
+ Hi,
+
+ // Get the Lower 16 bits from a 32-bit immediate
+ // No relation with Mips Lo register
+ Lo,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // General Dynamic TLS
+ TlsGd,
+
+ // Local Exec TLS
+ TprelHi,
+ TprelLo,
+
+ // Thread Pointer
+ ThreadPointer,
+
+ // Floating Point Branch Conditional
+ FPBrcond,
+
+ // Floating Point Compare
+ FPCmp,
+
+ // Floating Point Conditional Moves
+ CMovFP_T,
+ CMovFP_F,
+
+ // Floating Point Rounding
+ FPRound,
+
+ // Return
+ Ret,
+
+ // MAdd/Sub nodes
+ MAdd,
+ MAddu,
+ MSub,
+ MSubu,
+
+ // DivRem(u)
+ DivRem,
+ DivRemU,
+
+ BuildPairF64,
+ ExtractElementF64,
+
+ WrapperPIC,
+
+ DynAlloc,
+
+ Sync,
+
+ Ext,
+ Ins
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MipsTargetLowering : public TargetLowering {
+ public:
+ explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ EVT getSetCCResultType(EVT VT) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ private:
+ // Subtarget Info
+ const MipsSubtarget *Subtarget;
+
+ bool HasMips64, IsN64;
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ // Lower Operand specifics
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode, bool Nand = false) const;
+ MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+ bool Nand = false) const;
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size) const;
+ MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size) const;
+ };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 0000000..2fb9d18
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,362 @@
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Mips FPU instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+// - 32 64-bit registers (default mode)
+// - 16 even 32-bit registers (32-bit compatible mode) for
+// single and double access.
+// * 32bit fp:
+// - 16 even 32-bit registers - single and double (aliased)
+// - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
+ SDTCisVT<2, i32>]>;
+def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+ SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, f64>,
+ SDTCisVT<2, i32>]>;
+
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
+def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
+ SDT_MipsExtractElementF64>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand" in
+ def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def IsFP64bit : Predicate<"Subtarget.isFP64bit()">;
+def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+// single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+// FP load.
+class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
+ Operand MemOpnd>:
+ FFI<op, (outs RC:$ft), (ins MemOpnd:$base),
+ !strconcat(opstr, "\t$ft, $base"), [(set RC:$ft, (FOp addr:$base))]>;
+
+// FP store.
+class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
+ Operand MemOpnd>:
+ FFI<op, (outs), (ins RC:$ft, MemOpnd:$base),
+ !strconcat(opstr, "\t$ft, $base"), [(store RC:$ft, addr:$base)]>;
+
+// Instructions that convert an FP value to 32-bit fixed point.
+multiclass FFR1_W_M<bits<6> funct, string opstr> {
+ def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
+ def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
+ Requires<[IsFP64bit]>;
+}
+
+// Instructions that convert an FP value to 64-bit fixed point.
+let Predicates = [IsFP64bit] in
+multiclass FFR1_L_M<bits<6> funct, string opstr> {
+ def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
+ def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
+}
+
+// FP-to-FP conversion instructions.
+multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
+ def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
+ def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
+ Requires<[IsFP64bit]>;
+}
+
+multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
+ let isCommutable = isComm in {
+ def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
+ def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
+ Requires<[IsFP64bit]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+defm ROUND_W : FFR1_W_M<0xc, "round">;
+defm ROUND_L : FFR1_L_M<0x8, "round">;
+defm TRUNC_W : FFR1_W_M<0xd, "trunc">;
+defm TRUNC_L : FFR1_L_M<0x9, "trunc">;
+defm CEIL_W : FFR1_W_M<0xe, "ceil">;
+defm CEIL_L : FFR1_L_M<0xa, "ceil">;
+defm FLOOR_W : FFR1_W_M<0xf, "floor">;
+defm FLOOR_L : FFR1_L_M<0xb, "floor">;
+defm CVT_W : FFR1_W_M<0x24, "cvt">;
+defm CVT_L : FFR1_L_M<0x25, "cvt">;
+
+def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>;
+
+let Predicates = [NotFP64bit] in {
+ def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
+ def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
+ def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
+}
+
+let Predicates = [IsFP64bit] in {
+ def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
+ def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
+ def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
+ def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>;
+ def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
+}
+
+defm FABS : FFR1P_M<0x5, "abs", fabs>;
+defm FNEG : FFR1P_M<0x7, "neg", fneg>;
+defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>;
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+let fd = 0 in {
+ /// Move Control Registers From/To CPU Registers
+ def CFC1 : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs),
+ "cfc1\t$rt, $fs", []>;
+
+ def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
+ "ctc1\t$fs, $rt", []>;
+
+ def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
+ "mfc1\t$rt, $fs",
+ [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>;
+
+ def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
+ "mtc1\t$rt, $fs",
+ [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
+}
+
+def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
+def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
+ Requires<[NotFP64bit]>;
+def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
+ Requires<[IsFP64bit]>;
+
+/// Floating Point Memory Instructions
+let Predicates = [IsN64] in {
+ def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>;
+ def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>;
+ def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>;
+ def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>;
+}
+
+let Predicates = [NotN64] in {
+ def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>;
+ def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>;
+ let Predicates = [HasMips64] in {
+ def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>;
+ def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>;
+ }
+ let Predicates = [NotMips64] in {
+ def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>;
+ def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>;
+ }
+}
+
+/// Floating-point Aritmetic
+defm FADD : FFR2P_M<0x10, "add", fadd, 1>;
+defm FDIV : FFR2P_M<0x03, "div", fdiv>;
+defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
+defm FSUB : FFR2P_M<0x01, "sub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
+
+/// Floating Point Branch of False/True (Likely)
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
+ class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+ (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"),
+ [(MipsFPBrcond op, bb:$dst)]>;
+
+def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
+def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN : PatLeaf<(i32 1)>;
+def MIPS_FCOND_OEQ : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+let Defs=[FCR31] in {
+ def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
+ "c.$cc.s\t$fs, $ft",
+ [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
+
+ def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
+ "c.$cc.d\t$fs, $ft",
+ [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
+ Requires<[NotFP64bit]>;
+}
+
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:float
+let usesCustomInserter = 1, Constraints = "$F = $dst" in
+class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
+ string instr_asm> :
+ FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
+
+def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
+def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
+
+let Predicates = [NotFP64bit] in {
+ def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
+ def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
+}
+
+defm : MovzPats<FGR32, MOVZ_S>;
+defm : MovnPats<FGR32, MOVN_S>;
+
+let Predicates = [NotFP64bit] in {
+ defm : MovzPats<AFGR64, MOVZ_D>;
+ defm : MovnPats<AFGR64, MOVN_D>;
+}
+
+let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in {
+// flag:float, data:int
+class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
+ FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+ [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
+
+// flag:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+ string instr_asm> :
+ FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+ [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
+}
+
+def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
+def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+
+let Predicates = [NotFP64bit] in {
+ def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+ def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
+
+// This pseudo instr gets expanded into 2 mtc1 instrs after register
+// allocation.
+def BuildPairF64 :
+ MipsPseudo<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi), "",
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+
+// This pseudo instr gets expanded into 2 mfc1 instrs after register
+// allocation.
+// if n is 0, lower part of src is extracted.
+// if n is 1, higher part of src is extracted.
+def ExtractElementF64 :
+ MipsPseudo<(outs CPURegs:$dst),
+ (ins AFGR64:$src, i32imm:$n), "",
+ [(set CPURegs:$dst,
+ (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
+
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
+
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
+def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
+
+let Predicates = [NotFP64bit] in {
+ def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 0000000..d246a26
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,267 @@
+//===- MipsInstrFormats.td - Mips Instruction Formats ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rs - src reg.
+// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// rd - dst reg, only used on 3 regs instr.
+// shamt - only used on shift instructions, contains the shift amount.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "Mips";
+
+ bits<6> opcode;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{31-26} = opcode;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo> {
+ let isPseudo = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> shamt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{25-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// FLOATING POINT INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// fs - src reg.
+// ft - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// fd - dst reg, only used on 3 regs instr.
+// fmt - double or single precision.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
+//===----------------------------------------------------------------------===//
+
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
+ bits<5> fmt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> ft;
+ bits<5> base;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = ft;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
+//===----------------------------------------------------------------------===//
+
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cc;
+ bits<5> fmt;
+
+ let opcode = 0x11;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-4} = 0b11;
+ let Inst{3-0} = cc;
+}
+
+
+class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<3> N;
+ bits<1> tf;
+
+ let opcode = 0;
+ let tf = _tf;
+
+ let Inst{25-21} = rs;
+ let Inst{20-18} = N;
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 1;
+}
+
+class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<3> N;
+ bits<5> fmt;
+ bits<1> tf;
+
+ let opcode = 17;
+ let fmt = _fmt;
+ let tf = _tf;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-18} = N;
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = 17;
+}
+
+// FP unary instructions without patterns.
+class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
+ RegisterClass DstRC, RegisterClass SrcRC> :
+ FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> {
+ let ft = 0;
+}
+
+// FP unary instructions with patterns.
+class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
+ RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> :
+ FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))]> {
+ let ft = 0;
+}
+
+class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
+ string fmtstr, RegisterClass RC, SDNode OpNode> :
+ FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 0000000..559943a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,459 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+ : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
+ RI(*TM.getSubtargetImpl(), *this) {}
+
+
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+void MipsInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ Opc = Mips::MFC1;
+ else if (SrcReg == Mips::HI)
+ Opc = Mips::MFHI, SrcReg = 0;
+ else if (SrcReg == Mips::LO)
+ Opc = Mips::MFLO, SrcReg = 0;
+ }
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
+ if (Mips::CCRRegClass.contains(DestReg))
+ Opc = Mips::CTC1;
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ Opc = Mips::MTC1;
+ else if (DestReg == Mips::HI)
+ Opc = Mips::MTHI, DestReg = 0;
+ else if (DestReg == Mips::LO)
+ Opc = Mips::MTLO, DestReg = 0;
+ }
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
+ }
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void MipsInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ unsigned Opc = 0;
+
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (RC == Mips::CPU64RegsRegisterClass)
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (RC == Mips::AFGR64RegisterClass)
+ Opc = Mips::SDC1;
+ else if (RC == Mips::FGR64RegisterClass)
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+}
+
+void MipsInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ unsigned Opc = 0;
+
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (RC == Mips::CPU64RegsRegisterClass)
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (RC == Mips::AFGR64RegisterClass)
+ Opc = Mips::LDC1;
+ else if (RC == Mips::FGR64RegisterClass)
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0);
+}
+
+MachineInstr*
+MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static unsigned GetAnalyzableBrOpc(unsigned Opc) {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ?
+ Opc : 0;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
+{
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ : return Mips::BNE;
+ case Mips::BNE : return Mips::BEQ;
+ case Mips::BGTZ : return Mips::BLEZ;
+ case Mips::BGEZ : return Mips::BLTZ;
+ case Mips::BLTZ : return Mips::BGEZ;
+ case Mips::BLEZ : return Mips::BGTZ;
+ case Mips::BEQ64 : return Mips::BNE64;
+ case Mips::BNE64 : return Mips::BEQ64;
+ case Mips::BGTZ64 : return Mips::BLEZ64;
+ case Mips::BGEZ64 : return Mips::BLTZ64;
+ case Mips::BLTZ64 : return Mips::BGEZ64;
+ case Mips::BLEZ64 : return Mips::BGTZ64;
+ case Mips::BC1T : return Mips::BC1F;
+ case Mips::BC1F : return Mips::BC1T;
+ }
+}
+
+static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand>& Cond) {
+ assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+ int NumOp = Inst->getNumExplicitOperands();
+
+ // for both int and fp branches, the last explicit operand is the
+ // MBB.
+ BB = Inst->getOperand(NumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Opc));
+
+ for (int i=0; i<NumOp-1; i++)
+ Cond.push_back(Inst->getOperand(i));
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+ // If this block ends with no branches (it just falls through to its succ)
+ // just return false, leaving TBB/FBB null.
+ TBB = FBB = NULL;
+ return false;
+ }
+
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (!GetAnalyzableBrOpc(LastOpc))
+ return true;
+
+ // Get the second to last instruction in the block.
+ unsigned SecondLastOpc = 0;
+ MachineInstr *SecondLastInst = NULL;
+
+ if (++I != REnd) {
+ SecondLastInst = &*I;
+ SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+ return true;
+ }
+
+ // If there is only one terminator instruction, process it.
+ if (!SecondLastOpc) {
+ // Unconditional branch
+ if (LastOpc == Mips::J) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // Conditional branch
+ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+ return false;
+ }
+
+ // If we reached here, there are two branches.
+ // If there are three terminators, we don't know what sort of block this is.
+ if (++I != REnd && isUnpredicatedTerminator(&*I))
+ return true;
+
+ // If second to last instruction is an unconditional branch,
+ // analyze it and remove the last instruction.
+ if (SecondLastOpc == Mips::J) {
+ // Return if the last instruction cannot be removed.
+ if (!AllowModify)
+ return true;
+
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ LastInst->eraseFromParent();
+ return false;
+ }
+
+ // Conditional branch followed by an unconditional branch.
+ // The last one must be unconditional.
+ if (LastOpc != Mips::J)
+ return true;
+
+ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+
+ return false;
+}
+
+void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond)
+ const {
+ unsigned Opc = Cond[0].getImm();
+ const MCInstrDesc &MCID = get(Opc);
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
+
+ for (unsigned i = 1; i < Cond.size(); ++i)
+ MIB.addReg(Cond[i].getReg());
+
+ MIB.addMBB(TBB);
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ // # of condition operands:
+ // Unconditional branches: 0
+ // Floating point branches: 1 (opc)
+ // Int BranchZero: 2 (opc, reg)
+ // Int Branch: 3 (opc, reg0, reg1)
+ assert((Cond.size() <= 3) &&
+ "# of Mips branch conditions must be <= 3!");
+
+ // Two-way Conditional branch.
+ if (FBB) {
+ BuildCondBr(MBB, TBB, DL, Cond);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+ return 2;
+ }
+
+ // One way branch.
+ // Unconditional branch.
+ if (Cond.empty())
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+ else // Conditional branch.
+ BuildCondBr(MBB, TBB, DL, Cond);
+ return 1;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+ MachineBasicBlock::reverse_iterator FirstBr;
+ unsigned removed;
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ FirstBr = I;
+
+ // Up to 2 branches are removed.
+ // Note that indirect branches are not removed.
+ for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+ if (!GetAnalyzableBrOpc(I->getOpcode()))
+ break;
+
+ MBB.erase(I.base(), FirstBr.base());
+
+ return removed;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert( (Cond.size() && Cond.size() <= 3) &&
+ "Invalid Mips branch condition!");
+ Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+ return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+ unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Mips::GP);
+ RegInfo.addLiveIn(Mips::GP);
+
+ MipsFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 0000000..271d248
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,195 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MipsGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace Mips {
+ /// GetOppositeBranchOpc - Return the inverse of the specified
+ /// opcode, e.g. turning BEQ to BNE.
+ unsigned GetOppositeBranchOpc(unsigned Opc);
+}
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // Mips Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ MO_GPREL,
+
+ /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HI,
+ MO_ABS_LO,
+
+ /// MO_TLSGD - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (General
+ // Dynamic TLS).
+ MO_TLSGD,
+
+ /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+ // Exec TLS).
+ MO_GOTTPREL,
+
+ /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+ // the thread pointer (Local Exec TLS).
+ MO_TPREL_HI,
+ MO_TPREL_LO,
+
+ // N32/64 Flags.
+ MO_GPOFF_HI,
+ MO_GPOFF_LO,
+ MO_GOT_DISP,
+ MO_GOT_PAGE,
+ MO_GOT_OFST
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for
+ // Mips instructions.
+ //
+
+ // Pseudo - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// FrmR - This form is for instructions of the format R.
+ FrmR = 1,
+ /// FrmI - This form is for instructions of the format I.
+ FrmI = 2,
+ /// FrmJ - This form is for instructions of the format J.
+ FrmJ = 3,
+ /// FrmFR - This form is for instructions of the format FR.
+ FrmFR = 4,
+ /// FrmFI - This form is for instructions of the format FI.
+ FrmFI = 5,
+ /// FrmOther - This form is for instructions that have no specific format.
+ FrmOther = 6,
+
+ FormMask = 15
+ };
+}
+
+class MipsInstrInfo : public MipsGenInstrInfo {
+ MipsTargetMachine &TM;
+ bool IsN64;
+ const MipsRegisterInfo RI;
+public:
+ explicit MipsInstrInfo(MipsTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+private:
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
+
+public:
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 0000000..06b7de7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,1017 @@
+//===- MipsInstrInfo.td - Target Description for Mips Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<3, 4>,
+ SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<2, 3>]>;
+def SDT_MipsDivRem : SDTypeProfile<0, 2,
+ [SDTCisInt<0>,
+ SDTCisSameAs<0, 1>]>;
+
+def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+
+def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, iPTR>]>;
+def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>;
+def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>,
+ SDTCisSameAs<0, 4>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// TlsGd node is used to handle General Dynamic TLS
+def MipsTlsGd : SDNode<"MipsISD::TlsGd", SDTIntUnaryOp>;
+
+// TprelHi and TprelLo nodes are used to handle Local Exec TLS
+def MipsTprelHi : SDNode<"MipsISD::TprelHi", SDTIntUnaryOp>;
+def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
+
+// Thread pointer
+def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+ SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// MAdd*/MSub* nodes
+def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+
+// DivRem(u) nodes
+def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
+ [SDNPOutGlue]>;
+def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+ [SDNPOutGlue]>;
+
+// Target constant nodes that are not part of any isel patterns and remain
+// unchanged can cause instructions with illegal operands to be emitted.
+// Wrapper node patterns give the instruction selector a chance to replace
+// target constant nodes that would otherwise remain unchanged with ADDiu
+// nodes. Without these wrapper node patterns, the following conditional move
+// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
+// compiled:
+// movn %got(d)($gp), %got(c)($gp), $4
+// This instruction is illegal since movn can take only register operands.
+
+def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
+
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+ [SDNPHasChain, SDNPInGlue]>;
+
+def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>;
+
+def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
+def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
+def HasSwap : Predicate<"Subtarget.hasSwap()">;
+def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
+def HasMips32 : Predicate<"Subtarget.hasMips32()">;
+def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">;
+def HasMips64 : Predicate<"Subtarget.hasMips64()">;
+def NotMips64 : Predicate<"!Subtarget.hasMips64()">;
+def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">;
+def IsN64 : Predicate<"Subtarget.isABI_N64()">;
+def NotN64 : Predicate<"!Subtarget.isABI_N64()">;
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def simm16_64 : Operand<i64>;
+def shamt : Operand<i32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Address operand
+def mem : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+def mem64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPU64Regs, simm16_64);
+}
+
+def mem_ea : Operand<i32> {
+ let PrintMethod = "printMemOperandEA";
+ let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+ else
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragment for load/store
+//===----------------------------------------------------------------------===//
+class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment();
+}]>;
+
+class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment();
+}]>;
+
+class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
+ (Node node:$val, node:$ptr), [{
+ StoreSDNode *SD = cast<StoreSDNode>(N);
+ return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment();
+}]>;
+
+class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
+ (Node node:$val, node:$ptr), [{
+ StoreSDNode *SD = cast<StoreSDNode>(N);
+ return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment();
+}]>;
+
+// Load/Store PatFrags.
+def sextloadi16_a : AlignedLoad<sextloadi16>;
+def zextloadi16_a : AlignedLoad<zextloadi16>;
+def extloadi16_a : AlignedLoad<extloadi16>;
+def load_a : AlignedLoad<load>;
+def sextloadi32_a : AlignedLoad<sextloadi32>;
+def zextloadi32_a : AlignedLoad<zextloadi32>;
+def extloadi32_a : AlignedLoad<extloadi32>;
+def truncstorei16_a : AlignedStore<truncstorei16>;
+def store_a : AlignedStore<store>;
+def truncstorei32_a : AlignedStore<truncstorei32>;
+def sextloadi16_u : UnalignedLoad<sextloadi16>;
+def zextloadi16_u : UnalignedLoad<zextloadi16>;
+def extloadi16_u : UnalignedLoad<extloadi16>;
+def load_u : UnalignedLoad<load>;
+def sextloadi32_u : UnalignedLoad<sextloadi32>;
+def zextloadi32_u : UnalignedLoad<zextloadi32>;
+def extloadi32_u : UnalignedLoad<extloadi32>;
+def truncstorei16_u : UnalignedStore<truncstorei16>;
+def store_u : UnalignedStore<store>;
+def truncstorei32_u : UnalignedStore<truncstorei32>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic and logical instructions with 3 register operands.
+class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+ InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> {
+ let shamt = 0;
+ let isCommutable = isComm;
+}
+
+class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
+ InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> {
+ let shamt = 0;
+ let isCommutable = isComm;
+}
+
+// Arithmetic and logical instructions with 2 register operands.
+class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rt, $rs, $i"),
+ [(set RC:$rt, (OpNode RC:$rs, imm_type:$i))], IIAlu>;
+
+class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rt, $rs, $i"), [], IIAlu>;
+
+// Arithmetic Multiply ADD/SUB
+let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
+class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
+ FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"),
+ [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
+ let rd = 0;
+ let shamt = 0;
+ let isCommutable = isComm;
+}
+
+// Logical
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> {
+ let shamt = 0;
+ let isCommutable = 1;
+}
+
+// Shifts
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt),
+ !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
+ [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> {
+ let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt, $rs"),
+ [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> {
+ let shamt = isRotate;
+}
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+ FI<op, (outs CPURegs:$rt), (ins uimm16:$imm),
+ !strconcat(instr_asm, "\t$rt, $imm"), [], IIAlu> {
+ let rs = 0;
+}
+
+// Memory Load/Store
+let canFoldAsLoad = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
+ Operand MemOpnd, bit Pseudo>:
+ FI<op, (outs RC:$rt), (ins MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr))], IILoad> {
+ let isPseudo = Pseudo;
+}
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
+ Operand MemOpnd, bit Pseudo>:
+ FI<op, (outs), (ins RC:$rt, MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], IIStore> {
+ let isPseudo = Pseudo;
+}
+
+// 32-bit load.
+multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 64-bit load.
+multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 32-bit store.
+multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 64-bit store.
+multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// Conditional Branch
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
+ CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $rt, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
+ RegisterClass RC>:
+ CBranchBase<op, (outs), (ins RC:$rs, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch> {
+ let rt = _rt;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+// SetCC
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op,
+ RegisterClass RC>:
+ FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))],
+ IIAlu> {
+ let shamt = 0;
+}
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
+ PatLeaf imm_type, RegisterClass RC>:
+ FI<op, (outs CPURegs:$rd), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rd, $rs, $i"),
+ [(set CPURegs:$rd, (cond_op RC:$rs, imm_type:$i))],
+ IIAlu>;
+
+// Unconditional branch
+let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+ FJ<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+ FR<op, func, (outs), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> {
+ let rt = 0;
+ let rd = 0;
+ let shamt = 0;
+}
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1,
+ // All calls clobber the non-callee saved registers...
+ Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
+ class JumpLink<bits<6> op, string instr_asm>:
+ FJ<op, (outs), (ins calltarget:$target, variable_ops),
+ !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
+ IIBranch>;
+
+ class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+ FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
+ !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> {
+ let rt = 0;
+ let rd = 31;
+ let shamt = 0;
+ }
+
+ class BranchLink<string instr_asm>:
+ FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
+ !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch> {
+ let rt = 0;
+ }
+}
+
+// Mul, Div
+class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
+ let rd = 0;
+ let shamt = 0;
+ let isCommutable = 1;
+ let Defs = [HI, LO];
+}
+
+class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
+ [(op CPURegs:$rs, CPURegs:$rt)], itin> {
+ let rd = 0;
+ let shamt = 0;
+ let Defs = [HI, LO];
+}
+
+// Move from Hi/Lo
+class MoveFromLOHI<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs CPURegs:$rd), (ins),
+ !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
+ let rs = 0;
+ let rt = 0;
+ let shamt = 0;
+}
+
+class MoveToLOHI<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
+ let rt = 0;
+ let rd = 0;
+ let shamt = 0;
+}
+
+class EffectiveAddress<string instr_asm> :
+ FI<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr),
+ instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
+ FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
+
+// Sign Extend in Register.
+class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>:
+ FR<0x3f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"),
+ [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> {
+ let rs = 0;
+ let shamt = sa;
+ let Predicates = [HasSEInReg];
+}
+
+// Byte Swap
+class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
+ FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"),
+ [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> {
+ let rs = 0;
+ let shamt = sa;
+ let Predicates = [HasSwap];
+}
+
+// Read Hardware
+class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
+ "rdhwr\t$rt, $rd", [], IIAlu> {
+ let rs = 0;
+ let shamt = 0;
+}
+
+// Ext and Ins
+class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
+ list<dag> pattern, InstrItinClass itin>:
+ FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ pattern, itin>, Requires<[HasMips32r2]> {
+ bits<5> pos;
+ bits<5> sz;
+ let rd = sz;
+ let shamt = pos;
+}
+
+// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
+class Atomic2Ops<PatFrag Op, string Opstr> :
+ MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+ [(set CPURegs:$dst,
+ (Op CPURegs:$ptr, CPURegs:$incr))]>;
+
+// Atomic Compare & Swap.
+class AtomicCmpSwap<PatFrag Op, string Width> :
+ MipsPseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap),
+ !strconcat("atomic_cmp_swap_", Width,
+ "\t$dst, $ptr, $cmp, $swap"),
+ [(set CPURegs:$dst,
+ (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>;
+def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>;
+def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>;
+def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+
+// These macros are inserted to prevent GAS from complaining
+// when using the AT register.
+def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
+def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
+
+let usesCustomInserter = 1 in {
+ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">;
+ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">;
+ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">;
+ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">;
+ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">;
+ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">;
+ def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">;
+ def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">;
+ def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">;
+ def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">;
+ def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">;
+ def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">;
+ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">;
+ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">;
+ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">;
+ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">;
+ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">;
+ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">;
+
+ def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">;
+ def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">;
+ def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">;
+
+ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">;
+ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">;
+ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>;
+def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
+def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
+def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
+def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
+def LUi : LoadUpper<0x0f, "lui">;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
+def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>;
+def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>;
+def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>;
+def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>;
+def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>;
+def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
+def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>;
+
+/// Shift Instructions
+def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
+def SRL : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
+def SRA : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
+def SLLV : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
+def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
+def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+
+// Rotate Instructions
+let Predicates = [HasMips32r2] in {
+ def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
+ def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+}
+
+/// Load and Store Instructions
+/// aligned
+defm LB : LoadM32<0x20, "lb", sextloadi8>;
+defm LBu : LoadM32<0x24, "lbu", zextloadi8>;
+defm LH : LoadM32<0x21, "lh", sextloadi16_a>;
+defm LHu : LoadM32<0x25, "lhu", zextloadi16_a>;
+defm LW : LoadM32<0x23, "lw", load_a>;
+defm SB : StoreM32<0x28, "sb", truncstorei8>;
+defm SH : StoreM32<0x29, "sh", truncstorei16_a>;
+defm SW : StoreM32<0x2b, "sw", store_a>;
+
+/// unaligned
+defm ULH : LoadM32<0x21, "ulh", sextloadi16_u, 1>;
+defm ULHu : LoadM32<0x25, "ulhu", zextloadi16_u, 1>;
+defm ULW : LoadM32<0x23, "ulw", load_u, 1>;
+defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
+defm USW : StoreM32<0x2b, "usw", store_u, 1>;
+
+let hasSideEffects = 1 in
+def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
+ [(MipsSync imm:$stype)], NoItinerary>
+{
+ let opcode = 0;
+ let Inst{25-11} = 0;
+ let Inst{5-0} = 15;
+}
+
+/// Load-linked, Store-conditional
+let mayLoad = 1 in
+ def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
+ "ll\t$dst, $addr", [], IILoad>;
+let mayStore = 1, Constraints = "$src = $dst" in
+ def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
+ "sc\t$src, $addr", [], IIStore>;
+
+/// Jump and Branch Instructions
+def J : JumpFJ<0x02, "j">;
+let isIndirectBranch = 1 in
+ def JR : JumpFR<0x00, 0x08, "jr">;
+def JAL : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
+def BNE : CBranch<0x05, "bne", setne, CPURegs>;
+def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
+def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
+def BLEZ : CBranchZero<0x07, 0, "blez", setle, CPURegs>;
+def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
+
+def BGEZAL : BranchLink<"bgezal">;
+def BLTZAL : BranchLink<"bltzal">;
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+ isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+ def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
+ "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+
+/// Multiply and Divide Instructions.
+def MULT : Mul<0x18, "mult", IIImul>;
+def MULTu : Mul<0x19, "multu", IIImul>;
+def SDIV : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+
+let Defs = [HI] in
+ def MTHI : MoveToLOHI<0x11, "mthi">;
+let Defs = [LO] in
+ def MTLO : MoveToLOHI<0x13, "mtlo">;
+
+let Uses = [HI] in
+ def MFHI : MoveFromLOHI<0x10, "mfhi">;
+let Uses = [LO] in
+ def MFLO : MoveFromLOHI<0x12, "mflo">;
+
+/// Sign Ext In Register Instructions.
+def SEB : SignExtInReg<0x10, "seb", i8>;
+def SEH : SignExtInReg<0x18, "seh", i16>;
+
+/// Count Leading
+def CLZ : CountLeading<0x20, "clz",
+ [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>;
+def CLO : CountLeading<0x21, "clo",
+ [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>;
+
+/// Byte Swap
+def WSBW : ByteSwap<0x20, 0x2, "wsbw">;
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:int
+class CondMovIntInt<bits<6> funct, string instr_asm> :
+ FR<0, funct, (outs CPURegs:$rd),
+ (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
+ let shamt = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$F = $rd";
+}
+
+def MOVZ_I : CondMovIntInt<0x0a, "movz">;
+def MOVN_I : CondMovIntInt<0x0b, "movn">;
+
+/// No operation
+let addr=0 in
+ def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">;
+
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">;
+
+// MADD*/MSUB*
+def MADD : MArithR<0, "madd", MipsMAdd, 1>;
+def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
+def MSUB : MArithR<4, "msub", MipsMSub>;
+def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+
+// MUL is a assembly macro in the current used ISAs. In recent ISA's
+// it is a real instruction.
+def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
+ Requires<[HasMips32]>;
+
+def RDHWR : ReadHardware;
+
+def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
+ (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz),
+ [(set CPURegs:$rt,
+ (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
+ NoItinerary>;
+
+let Constraints = "$src = $rt" in
+def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
+ (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz, CPURegs:$src),
+ [(set CPURegs:$rt,
+ (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
+ CPURegs:$src))],
+ NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+ (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry patterns
+def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$src, immSExt16:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
+
+// hi/lo relocs
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+ (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+
+// gp_rel relocs
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// tlsgd
+def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
+ (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
+
+// tprel hi/lo
+def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
+
+// wrapper_pic
+class WrapperPICPat<SDNode node>:
+ Pat<(MipsWrapperPIC node:$in),
+ (ADDiu GP, node:$in)>;
+
+def : WrapperPICPat<tglobaladdr>;
+def : WrapperPICPat<tconstpool>;
+def : WrapperPICPat<texternalsym>;
+def : WrapperPICPat<tblockaddress>;
+def : WrapperPICPat<tjumptable>;
+
+// Mips does not have "not", so we expand our way
+def : Pat<(not CPURegs:$in),
+ (NOR CPURegs:$in, ZERO)>;
+
+// extended load and stores
+def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>;
+def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>;
+
+// peepholes
+def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+
+// brcond patterns
+multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
+ Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
+ Instruction SLTiuOp, Register ZEROReg> {
+def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
+ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
+ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
+
+def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond RC:$cond, bb:$dst),
+ (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
+}
+
+defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
+
+// select patterns
+multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
+ def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
+ def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
+ def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+ def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+ def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F),
+ (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
+ def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
+ (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
+ (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
+ def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F),
+ (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+defm : MovzPats<CPURegs, MOVZ_I>;
+defm : MovnPats<CPURegs, MOVN_I>;
+
+// setcc patterns
+multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
+ Instruction SLTuOp, Register ZEROReg> {
+ def : Pat<(seteq RC:$lhs, RC:$rhs),
+ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
+ def : Pat<(setne RC:$lhs, RC:$rhs),
+ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
+}
+
+multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setle RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+ def : Pat<(setule RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+}
+
+multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setgt RC:$lhs, RC:$rhs),
+ (SLTOp RC:$rhs, RC:$lhs)>;
+ def : Pat<(setugt RC:$lhs, RC:$rhs),
+ (SLTuOp RC:$rhs, RC:$lhs)>;
+}
+
+multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setge RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+ def : Pat<(setuge RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+}
+
+multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
+ Instruction SLTiuOp> {
+ def : Pat<(setge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : Pat<(setuge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+}
+
+defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>;
+defm : SetlePats<CPURegs, SLT, SLTu>;
+defm : SetgtPats<CPURegs, SLT, SLTu>;
+defm : SetgePats<CPURegs, SLT, SLTu>;
+defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
+
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+include "Mips64InstrInfo.td"
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp
new file mode 100644
index 0000000..28c2b48
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp
@@ -0,0 +1,230 @@
+//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Mips target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "MipsJITInfo.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+
+void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring. This code saves registers, calls
+// MipsCompilationCallbackC and restores registers.
+extern "C" {
+#if defined (__mips__)
+void MipsCompilationCallback();
+
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "MipsCompilationCallback\n"
+ ASMPREFIX "MipsCompilationCallback:\n"
+ ".ent " ASMPREFIX "MipsCompilationCallback\n"
+ ".frame $29, 32, $31\n"
+ ".set noreorder\n"
+ ".cpload $t9\n"
+
+ "addiu $sp, $sp, -60\n"
+ ".cprestore 16\n"
+
+ // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain
+ // stuff for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned. We also need to save the ra register since it contains the
+ // original return address, and t8 register since it contains the address
+ // of the end of function stub.
+ "sw $a0, 20($sp)\n"
+ "sw $a1, 24($sp)\n"
+ "sw $a2, 28($sp)\n"
+ "sw $a3, 32($sp)\n"
+ "sw $ra, 36($sp)\n"
+ "sw $t8, 40($sp)\n"
+ "sdc1 $f12, 44($sp)\n"
+ "sdc1 $f14, 52($sp)\n"
+
+ // t8 points at the end of function stub. Pass the beginning of the stub
+ // to the MipsCompilationCallbackC.
+ "addiu $a0, $t8, -16\n"
+ "jal " ASMPREFIX "MipsCompilationCallbackC\n"
+ "nop\n"
+
+ // Restore registers.
+ "lw $a0, 20($sp)\n"
+ "lw $a1, 24($sp)\n"
+ "lw $a2, 28($sp)\n"
+ "lw $a3, 32($sp)\n"
+ "lw $ra, 36($sp)\n"
+ "lw $t8, 40($sp)\n"
+ "ldc1 $f12, 44($sp)\n"
+ "ldc1 $f14, 52($sp)\n"
+ "addiu $sp, $sp, 60\n"
+
+ // Jump to the (newly modified) stub to invoke the real function.
+ "addiu $t8, $t8, -16\n"
+ "jr $t8\n"
+ "nop\n"
+
+ ".set reorder\n"
+ ".end " ASMPREFIX "MipsCompilationCallback\n"
+ );
+#else // host != Mips
+ void MipsCompilationCallback() {
+ llvm_unreachable(
+ "Cannot call MipsCompilationCallback() on a non-Mips arch!");
+ }
+#endif
+}
+
+/// MipsCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
+
+ // Rewrite the function stub so that we don't end up here every time we
+ // execute the call. We're replacing the first four instructions of the
+ // stub with code that jumps to the compiled function:
+ // lui $t9, %hi(NewVal)
+ // addiu $t9, $t9, %lo(NewVal)
+ // jr $t9
+ // nop
+
+ int Hi = ((unsigned)NewVal & 0xffff0000) >> 16;
+ if ((NewVal & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(NewVal & 0xffff);
+
+ *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi;
+ *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo;
+ *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8;
+ *(intptr_t *)(StubAddr + 12) = 0;
+
+ sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16);
+}
+
+TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction(
+ JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return MipsCompilationCallback;
+}
+
+TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
+ // The stub contains 4 4-byte instructions, aligned at 4 bytes. See
+ // emitFunctionStub for details.
+ StubLayout Result = { 4*4, 4 };
+ return Result;
+}
+
+void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ JCE.emitAlignment(4);
+ void *Addr = (void*) (JCE.getCurrentPCValue());
+ if (!sys::Memory::setRangeWritable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub writable.");
+
+ intptr_t EmittedAddr;
+ if (Fn != (void*)(intptr_t)MipsCompilationCallback)
+ EmittedAddr = (intptr_t)Fn;
+ else
+ EmittedAddr = (intptr_t)MipsCompilationCallback;
+
+
+ int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16;
+ if ((EmittedAddr & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(EmittedAddr & 0xffff);
+
+ // lui t9, %hi(EmittedAddr)
+ // addiu t9, t9, %lo(EmittedAddr)
+ // jalr t8, t9
+ // nop
+ JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
+ JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+ JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
+ JCE.emitWordLE(0);
+
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub executable.");
+
+ return Addr;
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+
+ void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
+
+ switch ((Mips::RelocationType) MR->getRelocationType()) {
+ case Mips::reloc_mips_branch:
+ ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_26:
+ ResultPtr = (ResultPtr & 0x0fffffff) >> 2;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_hi:
+ ResultPtr = ResultPtr >> 16;
+ if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) {
+ ResultPtr += 1;
+ }
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_lo:
+ ResultPtr = ResultPtr & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ default:
+ llvm_unreachable("ERROR: Unknown Mips relocation.");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.h b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h
new file mode 100644
index 0000000..41f32a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h
@@ -0,0 +1,70 @@
+//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSJITINFO_H
+#define MIPSJITINFO_H
+
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class MipsTargetMachine;
+
+class MipsJITInfo : public TargetJITInfo {
+
+ bool IsPIC;
+
+ public:
+ explicit MipsJITInfo() :
+ IsPIC(false) {}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on Mips.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ IsPIC = isPIC;
+ }
+
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
new file mode 100644
index 0000000..608a7d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -0,0 +1,126 @@
+//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCInstLower.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
+ MipsAsmPrinter &asmprinter)
+ : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+
+MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy,
+ unsigned Offset) const {
+ MipsMCSymbolRefExpr::VariantKind Kind;
+ const MCSymbol *Symbol;
+
+ switch(MO.getTargetFlags()) {
+ default: assert(0 && "Invalid target flag!");
+ case MipsII::MO_NO_FLAG: Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
+ case MipsII::MO_GPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
+ case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+ case MipsII::MO_GOT: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
+ case MipsII::MO_ABS_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
+ case MipsII::MO_ABS_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
+ case MipsII::MO_TLSGD: Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
+ case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+ case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+ case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+ case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
+ case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
+ case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break;
+ case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
+ case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break;
+ }
+
+ switch (MOTy) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ if (MO.getOffset())
+ Offset += MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
+ Ctx));
+}
+
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default:
+ assert(0 && "unknown operand type");
+ break;
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, 0);
+ }
+
+ return MCOperand();
+}
+
+void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp = LowerOperand(MO);
+
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
new file mode 100644
index 0000000..223f23a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -0,0 +1,44 @@
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCINSTLOWER_H
+#define MIPSMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineFunction;
+ class Mangler;
+ class MipsAsmPrinter;
+
+/// MipsMCInstLower - This class is used to lower an MachineInstr into an
+// MCInst.
+class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
+ typedef MachineOperand::MachineOperandType MachineOperandType;
+ MCContext &Ctx;
+ Mangler *Mang;
+ MipsAsmPrinter &AsmPrinter;
+public:
+ MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
+ MipsAsmPrinter &asmprinter);
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+private:
+ MCOperand LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy, unsigned Offset) const;
+ MCOperand LowerOperand(const MachineOperand& MO) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
new file mode 100644
index 0000000..a0a242c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -0,0 +1,70 @@
+//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mipsmcsymbolrefexpr"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+const MipsMCSymbolRefExpr*
+MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
+ int Offset, MCContext &Ctx) {
+ return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
+}
+
+void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: assert(0 && "Invalid kind!");
+ case VK_Mips_None: break;
+ case VK_Mips_GPREL: OS << "%gp_rel("; break;
+ case VK_Mips_GOT_CALL: OS << "%call16("; break;
+ case VK_Mips_GOT: OS << "%got("; break;
+ case VK_Mips_ABS_HI: OS << "%hi("; break;
+ case VK_Mips_ABS_LO: OS << "%lo("; break;
+ case VK_Mips_TLSGD: OS << "%tlsgd("; break;
+ case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+ case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+ case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+ case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
+ case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
+ case VK_Mips_GOT_DISP: OS << "%got_disp("; break;
+ case VK_Mips_GOT_PAGE: OS << "%got_page("; break;
+ case VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
+ }
+
+ OS << *Symbol;
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+
+ if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO)
+ OS << ")))";
+ else if (Kind != VK_Mips_None)
+ OS << ')';
+}
+
+bool
+MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
+ Asm->getOrCreateSymbolData(*Symbol);
+}
+
+const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
+ return Symbol->isDefined() ? &Symbol->getSection() : NULL;
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
new file mode 100644
index 0000000..55e85a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -0,0 +1,67 @@
+//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCSYMBOLREFEXPR_H
+#define MIPSMCSYMBOLREFEXPR_H
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class MipsMCSymbolRefExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_Mips_None,
+ VK_Mips_GPREL,
+ VK_Mips_GOT_CALL,
+ VK_Mips_GOT,
+ VK_Mips_ABS_HI,
+ VK_Mips_ABS_LO,
+ VK_Mips_TLSGD,
+ VK_Mips_GOTTPREL,
+ VK_Mips_TPREL_HI,
+ VK_Mips_TPREL_LO,
+ VK_Mips_GPOFF_HI,
+ VK_Mips_GPOFF_LO,
+ VK_Mips_GOT_DISP,
+ VK_Mips_GOT_PAGE,
+ VK_Mips_GOT_OFST
+ };
+
+private:
+ const VariantKind Kind;
+ const MCSymbol *Symbol;
+ int Offset;
+
+ explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
+ int _Offset)
+ : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
+
+public:
+ static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
+ const MCSymbol *Symbol, int Offset,
+ MCContext &Ctx);
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const MipsMCSymbolRefExpr *) { return true; }
+
+ int getOffset() const { return Offset; }
+ void setOffset(int O) { Offset = O; }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 0000000..bc30b6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,107 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include <utility>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+ MachineFunction& MF;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ // Range of frame object indices.
+ // InArgFIRange: Range of indices of all frame objects created during call to
+ // LowerFormalArguments.
+ // OutArgFIRange: Range of indices of all frame objects created during call to
+ // LowerCall except for the frame object for restoring $gp.
+ std::pair<int, int> InArgFIRange, OutArgFIRange;
+ int GPFI; // Index of the frame object for restoring $gp
+ mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
+ unsigned MaxCallFrameSize;
+
+public:
+ MipsFunctionInfo(MachineFunction& MF)
+ : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
+ VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
+ OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+ MaxCallFrameSize(0)
+ {}
+
+ bool isInArgFI(int FI) const {
+ return FI <= InArgFIRange.first && FI >= InArgFIRange.second;
+ }
+ void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
+
+ bool isOutArgFI(int FI) const {
+ return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
+ }
+ void extendOutArgFIRange(int FirstFI, int LastFI) {
+ if (!OutArgFIRange.second)
+ // this must be the first time this function was called.
+ OutArgFIRange.first = FirstFI;
+ OutArgFIRange.second = LastFI;
+ }
+
+ int getGPFI() const { return GPFI; }
+ void setGPFI(int FI) { GPFI = FI; }
+ bool needGPSaveRestore() const { return getGPFI(); }
+ bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
+
+ // The first call to this function creates a frame object for dynamically
+ // allocated stack area.
+ int getDynAllocFI() const {
+ if (!DynAllocFI)
+ DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+ return DynAllocFI;
+ }
+ bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
+ void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 0000000..f8c0fda
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,351 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// Mips::RA, return the number that it corresponds to (e.g. 31).
+unsigned MipsRegisterInfo::
+getRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
+ case Mips::D0:
+ return 0;
+ case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ return 1;
+ case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
+ case Mips::D1:
+ return 2;
+ case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ return 3;
+ case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
+ case Mips::D2:
+ return 4;
+ case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
+ return 5;
+ case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
+ case Mips::D3:
+ return 6;
+ case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
+ return 7;
+ case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
+ case Mips::D4:
+ return 8;
+ case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
+ return 9;
+ case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
+ case Mips::D5:
+ return 10;
+ case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
+ return 11;
+ case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
+ case Mips::D6:
+ return 12;
+ case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
+ return 13;
+ case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
+ case Mips::D7:
+ return 14;
+ case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
+ return 15;
+ case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
+ case Mips::D8:
+ return 16;
+ case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
+ return 17;
+ case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
+ case Mips::D9:
+ return 18;
+ case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
+ return 19;
+ case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
+ case Mips::D10:
+ return 20;
+ case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
+ return 21;
+ case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
+ case Mips::D11:
+ return 22;
+ case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
+ return 23;
+ case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
+ case Mips::D12:
+ return 24;
+ case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
+ return 25;
+ case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
+ case Mips::D13:
+ return 26;
+ case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
+ return 27;
+ case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
+ case Mips::D14:
+ return 28;
+ case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ return 29;
+ case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
+ case Mips::D15:
+ return 30;
+ case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
+ return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Mips callee-save register range is $16-$23, $f20-$f30
+ static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
+ Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
+ Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
+ Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
+ Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
+ };
+
+ static const unsigned Mips32CalleeSavedRegs[] = {
+ Mips::D15, Mips::D14, Mips::D13, Mips::D12, Mips::D11, Mips::D10,
+ Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
+ Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
+ };
+
+ static const unsigned N32CalleeSavedRegs[] = {
+ Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64,
+ Mips::D21_64,
+ Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
+ Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
+ Mips::S0_64, 0
+ };
+
+ static const unsigned N64CalleeSavedRegs[] = {
+ Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64,
+ Mips::D26_64, Mips::D25_64, Mips::D24_64,
+ Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
+ Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
+ Mips::S0_64, 0
+ };
+
+ if (Subtarget.isSingleFloat())
+ return SingleFloatOnlyCalleeSavedRegs;
+ else if (!Subtarget.hasMips64())
+ return Mips32CalleeSavedRegs;
+ else if (Subtarget.isABI_N32())
+ return N32CalleeSavedRegs;
+
+ assert(Subtarget.isABI_N64());
+ return N64CalleeSavedRegs;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ static const unsigned ReservedCPURegs[] = {
+ Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
+ Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0
+ };
+
+ static const unsigned ReservedCPU64Regs[] = {
+ Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
+ Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0
+ };
+
+ BitVector Reserved(getNumRegs());
+ typedef TargetRegisterClass::iterator RegIter;
+
+ for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg)
+ Reserved.set(*Reg);
+
+ if (Subtarget.hasMips64()) {
+ for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg)
+ Reserved.set(*Reg);
+
+ // Reserve all registers in AFGR64.
+ for (RegIter Reg = Mips::AFGR64RegisterClass->begin();
+ Reg != Mips::AFGR64RegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+ }
+ else {
+ // Reserve all registers in CPU64Regs & FGR64.
+ for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin();
+ Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+
+ for (RegIter Reg = Mips::FGR64RegisterClass->begin();
+ Reg != Mips::FGR64RegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+ }
+
+ return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ errs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n");
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following, its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+ MipsFI->isDynAllocFI(FrameIndex))
+ Offset = spOffset;
+ else
+ Offset = spOffset + stackSize;
+
+ Offset += MI.getOperand(i+1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+ // field.
+ if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
+ ((Offset & 0x8000) != 0);
+
+ // FIXME: change this when mips goes MC".
+ BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
+ BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
+ BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
+ .addReg(Mips::AT);
+ FrameReg = Mips::AT;
+ Offset = (short)(Offset & 0xffff);
+
+ BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+ }
+
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 0000000..67e57dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,69 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MipsGenRegisterInfo.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+ const MipsSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// Mips::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Adjust the Mips stack frame.
+ void adjustMipsStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 0000000..925ad9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,303 @@
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+class MipsRegWithSubRegs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 64-bit CPU Registers
+class Mips64GPRReg<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_32];
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 64-bit (aliased) FPU Registers
+class AFPR<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_fpeven, sub_fpodd];
+}
+
+class AFPR64<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_32];
+}
+
+// Mips Hardware Registers
+class HWR<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+ // FIXME: Fix DwarfRegNum.
+
+ // General Purpose Registers
+ def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
+ def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>;
+ def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>;
+ def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>;
+ def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>;
+ def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<[5]>;
+ def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<[6]>;
+ def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<[7]>;
+ def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<[8]>;
+ def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<[9]>;
+ def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<[10]>;
+ def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<[11]>;
+ def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<[12]>;
+ def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<[13]>;
+ def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<[14]>;
+ def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<[15]>;
+ def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<[16]>;
+ def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<[17]>;
+ def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<[18]>;
+ def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<[19]>;
+ def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<[20]>;
+ def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<[21]>;
+ def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<[22]>;
+ def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<[23]>;
+ def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<[24]>;
+ def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>;
+ def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>;
+ def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>;
+ def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>;
+ def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>;
+ def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
+ def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+
+ // General Purpose 64-bit Registers
+ def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>;
+ def AT_64 : Mips64GPRReg< 1, "AT", [AT]>;
+ def V0_64 : Mips64GPRReg< 2, "2", [V0]>;
+ def V1_64 : Mips64GPRReg< 3, "3", [V1]>;
+ def A0_64 : Mips64GPRReg< 4, "4", [A0]>;
+ def A1_64 : Mips64GPRReg< 5, "5", [A1]>;
+ def A2_64 : Mips64GPRReg< 6, "6", [A2]>;
+ def A3_64 : Mips64GPRReg< 7, "7", [A3]>;
+ def T0_64 : Mips64GPRReg< 8, "8", [T0]>;
+ def T1_64 : Mips64GPRReg< 9, "9", [T1]>;
+ def T2_64 : Mips64GPRReg< 10, "10", [T2]>;
+ def T3_64 : Mips64GPRReg< 11, "11", [T3]>;
+ def T4_64 : Mips64GPRReg< 12, "12", [T4]>;
+ def T5_64 : Mips64GPRReg< 13, "13", [T5]>;
+ def T6_64 : Mips64GPRReg< 14, "14", [T6]>;
+ def T7_64 : Mips64GPRReg< 15, "15", [T7]>;
+ def S0_64 : Mips64GPRReg< 16, "16", [S0]>;
+ def S1_64 : Mips64GPRReg< 17, "17", [S1]>;
+ def S2_64 : Mips64GPRReg< 18, "18", [S2]>;
+ def S3_64 : Mips64GPRReg< 19, "19", [S3]>;
+ def S4_64 : Mips64GPRReg< 20, "20", [S4]>;
+ def S5_64 : Mips64GPRReg< 21, "21", [S5]>;
+ def S6_64 : Mips64GPRReg< 22, "22", [S6]>;
+ def S7_64 : Mips64GPRReg< 23, "23", [S7]>;
+ def T8_64 : Mips64GPRReg< 24, "24", [T8]>;
+ def T9_64 : Mips64GPRReg< 25, "25", [T9]>;
+ def K0_64 : Mips64GPRReg< 26, "26", [K0]>;
+ def K1_64 : Mips64GPRReg< 27, "27", [K1]>;
+ def GP_64 : Mips64GPRReg< 28, "GP", [GP]>;
+ def SP_64 : Mips64GPRReg< 29, "SP", [SP]>;
+ def FP_64 : Mips64GPRReg< 30, "FP", [FP]>;
+ def RA_64 : Mips64GPRReg< 31, "RA", [RA]>;
+
+ /// Mips Single point precision FPU Registers
+ def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+
+ /// Mips Double point precision FPU Registers (aliased
+ /// with the single precision to hold 64 bit values)
+ def D0 : AFPR< 0, "F0", [F0, F1]>;
+ def D1 : AFPR< 2, "F2", [F2, F3]>;
+ def D2 : AFPR< 4, "F4", [F4, F5]>;
+ def D3 : AFPR< 6, "F6", [F6, F7]>;
+ def D4 : AFPR< 8, "F8", [F8, F9]>;
+ def D5 : AFPR<10, "F10", [F10, F11]>;
+ def D6 : AFPR<12, "F12", [F12, F13]>;
+ def D7 : AFPR<14, "F14", [F14, F15]>;
+ def D8 : AFPR<16, "F16", [F16, F17]>;
+ def D9 : AFPR<18, "F18", [F18, F19]>;
+ def D10 : AFPR<20, "F20", [F20, F21]>;
+ def D11 : AFPR<22, "F22", [F22, F23]>;
+ def D12 : AFPR<24, "F24", [F24, F25]>;
+ def D13 : AFPR<26, "F26", [F26, F27]>;
+ def D14 : AFPR<28, "F28", [F28, F29]>;
+ def D15 : AFPR<30, "F30", [F30, F31]>;
+
+ /// Mips Double point precision FPU Registers in MFP64 mode.
+ def D0_64 : AFPR64<0, "F0", [F0]>;
+ def D1_64 : AFPR64<1, "F1", [F1]>;
+ def D2_64 : AFPR64<2, "F2", [F2]>;
+ def D3_64 : AFPR64<3, "F3", [F3]>;
+ def D4_64 : AFPR64<4, "F4", [F4]>;
+ def D5_64 : AFPR64<5, "F5", [F5]>;
+ def D6_64 : AFPR64<6, "F6", [F6]>;
+ def D7_64 : AFPR64<7, "F7", [F7]>;
+ def D8_64 : AFPR64<8, "F8", [F8]>;
+ def D9_64 : AFPR64<9, "F9", [F9]>;
+ def D10_64 : AFPR64<10, "F10", [F10]>;
+ def D11_64 : AFPR64<11, "F11", [F11]>;
+ def D12_64 : AFPR64<12, "F12", [F12]>;
+ def D13_64 : AFPR64<13, "F13", [F13]>;
+ def D14_64 : AFPR64<14, "F14", [F14]>;
+ def D15_64 : AFPR64<15, "F15", [F15]>;
+ def D16_64 : AFPR64<16, "F16", [F16]>;
+ def D17_64 : AFPR64<17, "F17", [F17]>;
+ def D18_64 : AFPR64<18, "F18", [F18]>;
+ def D19_64 : AFPR64<19, "F19", [F19]>;
+ def D20_64 : AFPR64<20, "F20", [F20]>;
+ def D21_64 : AFPR64<21, "F21", [F21]>;
+ def D22_64 : AFPR64<22, "F22", [F22]>;
+ def D23_64 : AFPR64<23, "F23", [F23]>;
+ def D24_64 : AFPR64<24, "F24", [F24]>;
+ def D25_64 : AFPR64<25, "F25", [F25]>;
+ def D26_64 : AFPR64<26, "F26", [F26]>;
+ def D27_64 : AFPR64<27, "F27", [F27]>;
+ def D28_64 : AFPR64<28, "F28", [F28]>;
+ def D29_64 : AFPR64<29, "F29", [F29]>;
+ def D30_64 : AFPR64<30, "F30", [F30]>;
+ def D31_64 : AFPR64<31, "F31", [F31]>;
+
+ // Hi/Lo registers
+ def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def LO : Register<"lo">, DwarfRegNum<[65]>;
+
+ let SubRegIndices = [sub_32] in {
+ def HI64 : RegisterWithSubRegs<"hi", [HI]>;
+ def LO64 : RegisterWithSubRegs<"lo", [LO]>;
+ }
+
+ // Status flags register
+ def FCR31 : Register<"31">;
+
+ // Hardware register $29
+ def HWR29 : Register<"29">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Mips", [i32], 32, (add
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Reserved
+ ZERO, AT, K0, K1, GP, SP, FP, RA)>;
+
+def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add
+ // Return Values and Arguments
+ V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
+ // Not preserved across procedure calls
+ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64,
+ // Callee save
+ S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64,
+ // Reserved
+ ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> {
+ let SubRegClasses = [(CPURegs sub_32)];
+}
+
+// 64bit fp:
+// * FGR64 - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
+ // Return Values and Arguments
+ D0, D1, D6, D7,
+ // Not preserved across procedure calls
+ D2, D3, D4, D5, D8, D9,
+ // Callee save
+ D10, D11, D12, D13, D14, D15)> {
+ let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
+}
+
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> {
+ let SubRegClasses = [(FGR32 sub_32)];
+}
+
+// Condition Register for floating point operations
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
+ let SubRegClasses = [(HILO sub_32)];
+}
+
+// Hardware registers
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsRelocations.h b/contrib/llvm/lib/Target/Mips/MipsRelocations.h
new file mode 100644
index 0000000..66d1bfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRelocations.h
@@ -0,0 +1,41 @@
+//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file defines the Mips target-specific relocation types
+// (for relocation-model=static).
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef MIPSRELOCATIONS_H_
+#define MIPSRELOCATIONS_H_
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Mips{
+ enum RelocationType {
+ // reloc_mips_branch - pc relative relocation for branches. The lower 18
+ // bits of the difference between the branch target and the branch
+ // instruction, shifted right by 2.
+ reloc_mips_branch = 1,
+
+ // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
+ // lower 16 bits of the address is negative).
+ reloc_mips_hi = 2,
+
+ // reloc_mips_lo - lower 16 bits of the address.
+ reloc_mips_lo = 3,
+
+ // reloc_mips_26 - lower 28 bits of the address, shifted right by 2.
+ reloc_mips_26 = 4
+ };
+ }
+}
+
+#endif /* MIPSRELOCATIONS_H_ */
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 0000000..00be8ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 0000000..e4d70fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsTargetMachine.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 0000000..6cafb55
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsTargetMachine;
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+ ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 0000000..016d449
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,54 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MipsGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool little) :
+ MipsGenSubtargetInfo(TT, CPU, FS),
+ MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
+ IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
+ IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
+ HasMinMax(false), HasSwap(false), HasBitCount(false)
+{
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "mips32r1";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Set MipsABI if it hasn't been set yet.
+ if (MipsABI == UnknownABI)
+ MipsABI = hasMips64() ? N64 : O32;
+
+ // Check if Architecture and ABI are compatible.
+ assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) ||
+ (hasMips64() && (isABI_N32() || isABI_N64()))) &&
+ "Invalid Arch & ABI pair.");
+
+ // Is the target system Linux ?
+ if (TT.find("linux") == std::string::npos)
+ IsLinux = false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 0000000..d9dddad
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,133 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MipsGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MipsSubtarget : public MipsGenSubtargetInfo {
+
+public:
+ // NOTE: O64 will not be supported.
+ enum MipsABIEnum {
+ UnknownABI, O32, N32, N64, EABI
+ };
+
+protected:
+
+ enum MipsArchEnum {
+ Mips32, Mips32r2, Mips64, Mips64r2
+ };
+
+ // Mips architecture version
+ MipsArchEnum MipsArchVersion;
+
+ // Mips supported ABIs
+ MipsABIEnum MipsABI;
+
+ // IsLittle - The target is Little Endian
+ bool IsLittle;
+
+ // IsSingleFloat - The target only supports single precision float
+ // point operations. This enable the target to use all 32 32-bit
+ // floating point registers instead of only using even ones.
+ bool IsSingleFloat;
+
+ // IsFP64bit - The target processor has 64-bit floating point registers.
+ bool IsFP64bit;
+
+ // IsFP64bit - General-purpose registers are 64 bits wide
+ bool IsGP64bit;
+
+ // HasVFPU - Processor has a vector floating point unit.
+ bool HasVFPU;
+
+ // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+ bool IsLinux;
+
+ /// Features related to the presence of specific instructions.
+
+ // HasSEInReg - SEB and SEH (signext in register) instructions.
+ bool HasSEInReg;
+
+ // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+ bool HasCondMov;
+
+ // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+ // instructions.
+ bool HasMulDivAdd;
+
+ // HasMinMax - MIN and MAX instructions.
+ bool HasMinMax;
+
+ // HasSwap - Byte and half swap instructions.
+ bool HasSwap;
+
+ // HasBitCount - Count leading '1' and '0' bits.
+ bool HasBitCount;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// Only O32 and EABI supported right now.
+ bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_N64() const { return MipsABI == N64; }
+ bool isABI_N32() const { return MipsABI == N32; }
+ bool isABI_O32() const { return MipsABI == O32; }
+ unsigned getTargetABI() const { return MipsABI; }
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MipsSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool little);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasMips32() const { return MipsArchVersion >= Mips32; }
+ bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
+ MipsArchVersion == Mips64r2; }
+ bool hasMips64() const { return MipsArchVersion >= Mips64; }
+ bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
+
+ bool isLittle() const { return IsLittle; }
+ bool isFP64bit() const { return IsFP64bit; }
+ bool isGP64bit() const { return IsGP64bit; }
+ bool isGP32bit() const { return !IsGP64bit; }
+ bool isSingleFloat() const { return IsSingleFloat; }
+ bool isNotSingleFloat() const { return !IsSingleFloat; }
+ bool hasVFPU() const { return HasVFPU; }
+ bool isLinux() const { return IsLinux; }
+
+ /// Features related to the presence of specific instructions.
+ bool hasSEInReg() const { return HasSEInReg; }
+ bool hasCondMov() const { return HasCondMov; }
+ bool hasMulDivAdd() const { return HasMulDivAdd; }
+ bool hasMinMax() const { return HasMinMax; }
+ bool hasSwap() const { return HasSwap; }
+ bool hasBitCount() const { return HasBitCount; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 0000000..6480da3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,119 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMipsTarget() {
+ // Register the target.
+ RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
+ RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+ RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool isLittle):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS, isLittle),
+ DataLayout(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), JITInfo() {
+}
+
+MipsebTargetMachine::
+MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+
+MipselTargetMachine::
+MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+
+Mips64ebTargetMachine::
+Mips64ebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+
+Mips64elTargetMachine::
+Mips64elTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsDelaySlotFillerPass(*this));
+ return true;
+}
+
+bool MipsTargetMachine::
+addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ // Do not restore $gp if target is Mips64.
+ // In N32/64, $gp is a callee-saved register.
+ if (!Subtarget.hasMips64())
+ PM.add(createMipsEmitGPRestorePass(*this));
+ return true;
+}
+
+bool MipsTargetMachine::
+addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMipsExpandPseudoPass(*this));
+ return true;
+}
+
+bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for Mips.
+ PM.add(createMipsJITCodeEmitterPass(*this, JCE));
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 0000000..118ed10
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,120 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "MipsFrameLowering.h"
+#include "MipsSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "MipsJITInfo.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MipsInstrInfo InstrInfo;
+ MipsFrameLowering FrameLowering;
+ MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+
+ public:
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool isLittle);
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const MipsTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+
+ };
+
+/// MipsebTargetMachine - Mips32 big endian target machine.
+///
+class MipsebTargetMachine : public MipsTargetMachine {
+public:
+ MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// MipselTargetMachine - Mips32 little endian target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+public:
+ MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// Mips64ebTargetMachine - Mips64 big endian target machine.
+///
+class Mips64ebTargetMachine : public MipsTargetMachine {
+public:
+ Mips64ebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// Mips64elTargetMachine - Mips64 little endian target machine.
+///
+class Mips64elTargetMachine : public MipsTargetMachine {
+public:
+ Mips64elTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
new file mode 100644
index 0000000..05c46f5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -0,0 +1,102 @@
+//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+ cl::desc("Small data and bss section threshold size (default=8)"),
+ cl::init(8));
+
+void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= SSThreshold;
+}
+
+bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+
+ // Only use small section for non linux targets.
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if (Subtarget.isLinux())
+ return false;
+
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+
+
+const MCSection *MipsTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
new file mode 100644
index 0000000..c394a9d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM, SectionKind Kind)const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+
+ // TODO: Classify globals as mips wishes.
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
new file mode 100644
index 0000000..243632b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -0,0 +1,31 @@
+//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+Target llvm::TheMips64Target, llvm::TheMips64elTarget;
+
+extern "C" void LLVMInitializeMipsTargetInfo() {
+ RegisterTarget<Triple::mips,
+ /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips");
+
+ RegisterTarget<Triple::mipsel,
+ /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel");
+
+ RegisterTarget<Triple::mips64,
+ /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]");
+
+ RegisterTarget<Triple::mips64el,
+ /*HasJIT=*/false> B(TheMips64elTarget,
+ "mips64el", "Mips64el [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
new file mode 100644
index 0000000..aabb404
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -0,0 +1,192 @@
+//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PTXInstPrinter.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PTXGenAsmWriter.inc"
+
+PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printPredicate(MI, O);
+ switch (MI->getOpcode()) {
+ default:
+ printInstruction(MI, O);
+ break;
+ case PTX::CALL:
+ printCall(MI, O);
+ }
+ O << ";";
+ printAnnotation(O, Annot);
+}
+
+void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
+ // The last two operands are the predicate operands
+ int RegIndex;
+ int OpIndex;
+
+ if (MI->getOpcode() == PTX::CALL) {
+ RegIndex = 0;
+ OpIndex = 1;
+ } else {
+ RegIndex = MI->getNumOperands()-2;
+ OpIndex = MI->getNumOperands()-1;
+ }
+
+ int PredOp = MI->getOperand(OpIndex).getImm();
+ if (PredOp == PTXPredicate::None)
+ return;
+
+ if (PredOp == PTXPredicate::Negate)
+ O << '!';
+ else
+ O << '@';
+
+ printOperand(MI, RegIndex, O);
+}
+
+void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
+ O << "\tcall.uni\t";
+ // The first two operands are the predicate slot
+ unsigned Index = 2;
+ unsigned NumRets = MI->getOperand(Index++).getImm();
+
+ if (NumRets > 0) {
+ O << "(";
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumRets; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ O << "), ";
+ }
+
+ O << *(MI->getOperand(Index++).getExpr()) << ", (";
+
+ unsigned NumArgs = MI->getOperand(Index++).getImm();
+ if (NumArgs > 0) {
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ }
+ O << ")";
+}
+
+void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ double Imm = Op.getFPImm();
+ APFloat FPImm(Imm);
+ APInt FPIntImm = FPImm.bitcastToAPInt();
+ O << "0D";
+ // PTX requires us to output the full 64 bits, even if the number is zero
+ if (FPIntImm.getZExtValue() > 0) {
+ O << FPIntImm.toString(16, false);
+ } else {
+ O << "0000000000000000";
+ }
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ O << Sym.getName();
+ } else {
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // By definition, operand OpNo+1 is an i32imm
+ const MCOperand &Op2 = MI->getOperand(OpNo+1);
+ printOperand(MI, OpNo, O);
+ if (Op2.getImm() == 0)
+ return; // don't print "+0"
+ O << "+" << Op2.getImm();
+}
+
+void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert (Op.isImm() && "Rounding modes must be immediate values");
+ switch (Op.getImm()) {
+ default:
+ llvm_unreachable("Unknown rounding mode!");
+ case PTXRoundingMode::RndDefault:
+ llvm_unreachable("FP rounding-mode pass did not handle instruction!");
+ break;
+ case PTXRoundingMode::RndNone:
+ // Do not print anything.
+ break;
+ case PTXRoundingMode::RndNearestEven:
+ O << ".rn";
+ break;
+ case PTXRoundingMode::RndTowardsZero:
+ O << ".rz";
+ break;
+ case PTXRoundingMode::RndNegInf:
+ O << ".rm";
+ break;
+ case PTXRoundingMode::RndPosInf:
+ O << ".rp";
+ break;
+ case PTXRoundingMode::RndApprox:
+ O << ".approx";
+ break;
+ case PTXRoundingMode::RndNearestEvenInt:
+ O << ".rni";
+ break;
+ case PTXRoundingMode::RndTowardsZeroInt:
+ O << ".rzi";
+ break;
+ case PTXRoundingMode::RndNegInfInt:
+ O << ".rmi";
+ break;
+ case PTXRoundingMode::RndPosInfInt:
+ O << ".rpi";
+ break;
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
new file mode 100644
index 0000000..86dfd48
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -0,0 +1,47 @@
+//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints n PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXINSTPRINTER_H
+#define PTXINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PTXInstPrinter : public MCInstPrinter {
+public:
+ PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printPredicate(const MCInst *MI, raw_ostream &O);
+ void printCall(const MCInst *MI, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
new file mode 100644
index 0000000..c6094be
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -0,0 +1,63 @@
+//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXBASEINFO_H
+#define PTXBASEINFO_H
+
+#include "PTXMCTargetDesc.h"
+
+namespace llvm {
+ namespace PTXStateSpace {
+ enum {
+ Global = 0, // default to global state space
+ Constant = 1,
+ Local = 2,
+ Parameter = 3,
+ Shared = 4
+ };
+ } // namespace PTXStateSpace
+
+ namespace PTXPredicate {
+ enum {
+ Normal = 0,
+ Negate = 1,
+ None = 2
+ };
+ } // namespace PTXPredicate
+
+ /// Namespace to hold all target-specific flags.
+ namespace PTXRoundingMode {
+ // Instruction Flags
+ enum {
+ // Rounding Mode Flags
+ RndMask = 15,
+ RndDefault = 0, // ---
+ RndNone = 1, // <NONE>
+ RndNearestEven = 2, // .rn
+ RndTowardsZero = 3, // .rz
+ RndNegInf = 4, // .rm
+ RndPosInf = 5, // .rp
+ RndApprox = 6, // .approx
+ RndNearestEvenInt = 7, // .rni
+ RndTowardsZeroInt = 8, // .rzi
+ RndNegInfInt = 9, // .rmi
+ RndPosInfInt = 10 // .rpi
+ };
+ } // namespace PTXII
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
new file mode 100644
index 0000000..efefead
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -0,0 +1,35 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::ptx64)
+ PointerSize = 8;
+
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasDotTypeDotSizeDirective = false;
+
+ HasSingleParameterDotFile = false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
new file mode 100644
index 0000000..03f5d66
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ struct PTXMCAsmInfo : public MCAsmInfo {
+ explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+ };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
new file mode 100644
index 0000000..a5af3b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -0,0 +1,95 @@
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCTargetDesc.h"
+#include "PTXMCAsmInfo.h"
+#include "InstPrinter/PTXInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPTXMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPTXMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitPTXMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPTXMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ assert(SyntaxVariant == 0 && "We only have one syntax variant");
+ return new PTXInstPrinter(MAI, STI);
+}
+
+extern "C" void LLVMInitializePTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+ RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
+ createPTXMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
+ createPTXMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
new file mode 100644
index 0000000..1003b0b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXMCTARGETDESC_H
+#define PTXMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePTX32Target;
+extern Target ThePTX64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "PTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
new file mode 100644
index 0000000..7d46cce
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -0,0 +1,44 @@
+//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MachineInstr;
+ class MCInst;
+ class PTXAsmPrinter;
+ class PTXTargetMachine;
+ class FunctionPass;
+
+ FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXRegisterAllocator();
+
+ void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP);
+
+} // namespace llvm;
+
+#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
new file mode 100644
index 0000000..693bb9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -0,0 +1,141 @@
+//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+ "Do not demote .f64 to .f32">;
+
+def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
+ "Disable Fused-Multiply Add">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+ "Use PTX Language Version 2.0">;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+ "Use PTX Language Version 2.1">;
+
+def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
+ "Use PTX Language Version 2.2">;
+
+def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
+ "Use PTX Language Version 2.3">;
+
+//===- PTX Target ---------------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
+ "Use Shader Model 1.0">;
+def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
+ "Use Shader Model 1.1">;
+def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
+ "Use Shader Model 1.2">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
+ "Use Shader Model 1.3">;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
+ "Use Shader Model 2.0", [FeatureDouble]>;
+def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
+ "Use Shader Model 2.1", [FeatureDouble]>;
+def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
+ "Use Shader Model 2.2", [FeatureDouble]>;
+def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
+ "Use Shader Model 2.3", [FeatureDouble]>;
+
+def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
+ "PTX_COMPUTE_1_0",
+ "Use Compute Compatibility 1.0">;
+def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
+ "PTX_COMPUTE_1_1",
+ "Use Compute Compatibility 1.1">;
+def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
+ "PTX_COMPUTE_1_2",
+ "Use Compute Compatibility 1.2">;
+def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
+ "PTX_COMPUTE_1_3",
+ "Use Compute Compatibility 1.3">;
+def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
+ "PTX_COMPUTE_2_0",
+ "Use Compute Compatibility 2.0",
+ [FeatureDouble]>;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+// Processor definitions for compute/shader models
+def : Proc<"compute_10", [FeatureCOMPUTE10]>;
+def : Proc<"compute_11", [FeatureCOMPUTE11]>;
+def : Proc<"compute_12", [FeatureCOMPUTE12]>;
+def : Proc<"compute_13", [FeatureCOMPUTE13]>;
+def : Proc<"compute_20", [FeatureCOMPUTE20]>;
+def : Proc<"sm_10", [FeatureSM10]>;
+def : Proc<"sm_11", [FeatureSM11]>;
+def : Proc<"sm_12", [FeatureSM12]>;
+def : Proc<"sm_13", [FeatureSM13]>;
+def : Proc<"sm_20", [FeatureSM20]>;
+def : Proc<"sm_21", [FeatureSM21]>;
+def : Proc<"sm_22", [FeatureSM22]>;
+def : Proc<"sm_23", [FeatureSM23]>;
+
+// Processor definitions for common GPU architectures
+def : Proc<"g80", [FeatureSM10]>;
+def : Proc<"gt200", [FeatureSM13]>;
+def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
+def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// PTX uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def PTXAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+ let InstructionSet = PTXInstrInfo;
+ let AssemblyWriters = [PTXAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 0000000..733744b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,597 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTX.h"
+#include "PTXAsmPrinter.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/Argument.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static const char PARAM_PREFIX[] = "__param_";
+static const char RETURN_PREFIX[] = "__ret_";
+
+static const char *getRegisterTypeName(unsigned RegNo,
+ const MachineRegisterInfo& MRI) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
+
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass == TRC) return # clsstr;
+
+ TEST_REGCLS(RegPred, pred);
+ TEST_REGCLS(RegI16, b16);
+ TEST_REGCLS(RegI32, b32);
+ TEST_REGCLS(RegI64, b64);
+ TEST_REGCLS(RegF32, b32);
+ TEST_REGCLS(RegF64, b64);
+#undef TEST_REGCLS
+
+ llvm_unreachable("Not in any register class!");
+ return NULL;
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+ switch (addressSpace) {
+ default: llvm_unreachable("Unknown state space");
+ case PTXStateSpace::Global: return "global";
+ case PTXStateSpace::Constant: return "const";
+ case PTXStateSpace::Local: return "local";
+ case PTXStateSpace::Parameter: return "param";
+ case PTXStateSpace::Shared: return "shared";
+ }
+ return NULL;
+}
+
+static const char *getTypeName(Type* type) {
+ while (true) {
+ switch (type->getTypeID()) {
+ default: llvm_unreachable("Unknown type");
+ case Type::FloatTyID: return ".f32";
+ case Type::DoubleTyID: return ".f64";
+ case Type::IntegerTyID:
+ switch (type->getPrimitiveSizeInBits()) {
+ default: llvm_unreachable("Unknown integer bit-width");
+ case 16: return ".u16";
+ case 32: return ".u32";
+ case 64: return ".u64";
+ }
+ case Type::ArrayTyID:
+ case Type::PointerTyID:
+ type = dyn_cast<SequentialType>(type)->getElementType();
+ break;
+ }
+ }
+ return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable* [n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i ++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ // Emit the PTX .version and .target attributes
+ OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+ OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+ (ST.supportsDouble() ? ""
+ : ", map_f64_to_f32")));
+ // .address_size directive is optional, but it must immediately follow
+ // the .target directive if present within a module
+ if (ST.supportsPTX23()) {
+ std::string addrSize = ST.is64Bit() ? "64" : "32";
+ OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Define any .file directives
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(M);
+
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ DICompileUnit DIUnit(*I);
+ StringRef FN = DIUnit.getFilename();
+ StringRef Dir = DIUnit.getDirectory();
+ GetOrCreateSourceID(FN, Dir);
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // declare global variables
+ for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+ i != e; ++i)
+ EmitVariableDeclaration(i);
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+ OutStreamer.EmitRawText(Twine("{"));
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+
+ // Print register definitions
+ std::string regDefs;
+ unsigned numRegs;
+
+ // pred
+ numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .pred %p<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i16
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b16 %rh<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b32 %r<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b64 %rd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f32 %f<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f64 %fd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // Local params
+ for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
+ i != e; ++i) {
+ regDefs += "\t.param .b";
+ regDefs += utostr(PM.getParamSize(*i));
+ regDefs += " ";
+ regDefs += PM.getParamName(*i);
+ regDefs += ";\n";
+ }
+
+ OutStreamer.EmitRawText(Twine(regDefs));
+
+
+ const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+ DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+ << " frame object(s)\n");
+ for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+ DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+ if (FrameInfo->getObjectSize(i) > 0) {
+ std::string def = "\t.local .align ";
+ def += utostr(FrameInfo->getObjectAlignment(i));
+ def += " .b8";
+ def += " __local";
+ def += utostr(i);
+ def += "[";
+ def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
+ def += "]";
+ def += ";";
+ OutStreamer.EmitRawText(Twine(def));
+ }
+ }
+
+ //unsigned Index = 1;
+ // Print parameter passing params
+ //for (PTXMachineFunctionInfo::param_iterator
+ // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
+ // std::string def = "\t.param .b";
+ // def += utostr(*i);
+ // def += " __ret_";
+ // def += utostr(Index);
+ // Index++;
+ // def += ";";
+ // OutStreamer.EmitRawText(Twine(def));
+ //}
+}
+
+void PTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(Twine("}"));
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(gv))
+ return;
+
+ MCSymbol *gvsym = Mang->getSymbol(gv);
+
+ assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+ std::string decl;
+
+ // check if it is defined in some other translation unit
+ if (gv->isDeclaration())
+ decl += ".extern ";
+
+ // state space: e.g., .global
+ decl += ".";
+ decl += getStateSpaceName(gv->getType()->getAddressSpace());
+ decl += " ";
+
+ // alignment (optional)
+ unsigned alignment = gv->getAlignment();
+ if (alignment != 0) {
+ decl += ".align ";
+ decl += utostr(gv->getAlignment());
+ decl += " ";
+ }
+
+
+ if (PointerType::classof(gv->getType())) {
+ PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
+ Type* elementTy = pointerTy->getElementType();
+
+ decl += ".b8 ";
+ decl += gvsym->getName();
+ decl += "[";
+
+ if (elementTy->isArrayTy())
+ {
+ assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
+
+ ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ unsigned numElements = arrayTy->getNumElements();
+
+ while (elementTy->isArrayTy()) {
+
+ arrayTy = dyn_cast<ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ numElements *= arrayTy->getNumElements();
+ }
+
+ // FIXME: isPrimitiveType() == false for i16?
+ assert(elementTy->isSingleValueType() &&
+ "Non-primitive types are not handled");
+
+ // Compute the size of the array, in bytes.
+ uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
+ * numElements;
+
+ decl += utostr(arraySize);
+ }
+
+ decl += "]";
+
+ // handle string constants (assume ConstantArray means string)
+
+ if (gv->hasInitializer())
+ {
+ const Constant *C = gv->getInitializer();
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ {
+ decl += " = {";
+
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ {
+ if (i > 0) decl += ",";
+
+ decl += "0x" +
+ utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ }
+
+ decl += "}";
+ }
+ }
+ }
+ else {
+ // Note: this is currently the fall-through case and most likely generates
+ // incorrect code.
+ decl += getTypeName(gv->getType());
+ decl += " ";
+
+ decl += gvsym->getName();
+
+ if (ArrayType::classof(gv->getType()) ||
+ PointerType::classof(gv->getType()))
+ decl += "[]";
+ }
+
+ decl += ";";
+
+ OutStreamer.EmitRawText(Twine(decl));
+
+ OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionEntryLabel() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (!CurrentFnSym->isUndefined()) {
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+ return;
+ }
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+ const bool isKernel = MFI->isKernel();
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ const MachineRegisterInfo& MRI = MF->getRegInfo();
+
+ std::string decl = isKernel ? ".entry" : ".func";
+
+ if (!isKernel) {
+ decl += " (";
+ if (ST.useParamSpaceForDeviceArgs()) {
+ for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
+ decl += ".param .b";
+ decl += utostr(PM.getParamSize(*i));
+ decl += " ";
+ decl += PM.getParamName(*i);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i, MRI);
+ decl += " ";
+ decl += MFI->getRegisterName(*i);
+ }
+ }
+ decl += ")";
+ }
+
+ // Print function name
+ decl += " ";
+ decl += CurrentFnSym->getName().str();
+
+ decl += " (";
+
+ const Function *F = MF->getFunction();
+
+ // Print parameters
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
+ decl += ".param .b";
+ decl += utostr(PM.getParamSize(*i));
+ decl += " ";
+ decl += PM.getParamName(*i);
+ }*/
+ int Counter = 1;
+ for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ decl += ", ";
+ const Type *ArgType = (*i).getType();
+ decl += ".param .b";
+ if (ArgType->isPointerTy()) {
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ } else {
+ decl += utostr(ArgType->getPrimitiveSizeInBits());
+ }
+ if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
+ const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
+ decl += " .ptr";
+ switch (PtrType->getAddressSpace()) {
+ default:
+ llvm_unreachable("Unknown address space in argument");
+ case PTXStateSpace::Global:
+ decl += " .global";
+ break;
+ case PTXStateSpace::Shared:
+ decl += " .shared";
+ break;
+ }
+ }
+ decl += " __param_";
+ decl += utostr(Counter++);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i, MRI);
+ decl += " ";
+ decl += MFI->getRegisterName(*i);
+ }
+ }
+ decl += ")";
+
+ OutStreamer.EmitRawText(Twine(decl));
+}
+
+unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>", StringRef());
+
+ // MCStream expects full path name as filename.
+ if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+ SmallString<128> FullPathName = DirName;
+ sys::path::append(FullPathName, FileName);
+ // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+ return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+ }
+
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
+
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
+
+ // Print out a .file directive to specify files for .loc directives.
+ OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+ return SrcId;
+}
+
+MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
+ MCOperand MCOp;
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const MCExpr *Expr;
+ const char *RegSymbolName;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register:
+ // We create register operands as symbols, since the PTXInstPrinter class
+ // has no way to map virtual registers back to a name without some ugly
+ // hacks.
+ // FIXME: Figure out a better way to handle virtual register naming.
+ RegSymbolName = MFI->getRegisterName(MO.getReg());
+ Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
+ OutContext);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+
+ return MCOp;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+ RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
+ RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
new file mode 100644
index 0000000..538c080
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
@@ -0,0 +1,57 @@
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PTX Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXASMPRINTER_H
+#define PTXASMPRINTER_H
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ unsigned GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName);
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand lowerOperand(const MachineOperand &MO);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration();
+
+ StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
new file mode 100644
index 0000000..0b653e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -0,0 +1,179 @@
+//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a machine function pass that sets appropriate FP rounding
+// modes for all relevant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-fp-rounding-mode"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXFPRoundingModePass should be executed just before emission.
+
+namespace llvm {
+ /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
+ /// all FP instructions. Essentially, this pass just looks for all FP
+ /// instructions that have a rounding mode set to RndDefault, and sets an
+ /// appropriate rounding mode based on the target device.
+ ///
+ class PTXFPRoundingModePass : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ typedef std::pair<unsigned, unsigned> RndModeDesc;
+
+ PTXTargetMachine& TargetMachine;
+ DenseMap<unsigned, RndModeDesc> Instrs;
+
+ public:
+ PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID),
+ TargetMachine(TM) {
+ initializeMap();
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX FP Rounding Mode Pass";
+ }
+
+ private:
+
+ void initializeMap();
+ void processInstruction(MachineInstr &MI);
+ }; // class PTXFPRoundingModePass
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXFPRoundingModePass::ID = 0;
+
+bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
+ // Look at each basic block
+ for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
+ ++bbi) {
+ MachineBasicBlock &MBB = *bbi;
+ // Look at each instruction
+ for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
+ ii != ie; ++ii) {
+ MachineInstr &MI = *ii;
+ processInstruction(MI);
+ }
+ }
+ return false;
+}
+
+void PTXFPRoundingModePass::initializeMap() {
+ using namespace PTXRoundingMode;
+ const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
+
+ // Build a map of default rounding mode for all instructions that need a
+ // rounding mode.
+ Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
+
+ unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
+
+ unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
+
+ Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
+
+ Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+
+ Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+}
+
+void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
+ // Is this an instruction that needs a rounding mode?
+ if (Instrs.count(MI.getOpcode())) {
+ const RndModeDesc &Desc = Instrs[MI.getOpcode()];
+ // Get the rounding mode operand
+ MachineOperand &Op = MI.getOperand(Desc.first);
+ // Update the rounding mode if needed
+ if (Op.getImm() == PTXRoundingMode::RndDefault) {
+ Op.setImm(Desc.second);
+ }
+ }
+}
+
+FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXFPRoundingModePass(TM, OptLevel);
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 0000000..b621b9d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 0000000..9320676
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,44 @@
+//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+ const PTXSubtarget &STI;
+
+public:
+ explicit PTXFrameLowering(const PTXSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 0000000..5c7ee29
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,356 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+ public:
+ PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+ virtual const char *getPassName() const {
+ return "PTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ SDNode *Select(SDNode *Node);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+ private:
+ // We need this only because we can't match intruction BRAdp
+ // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
+ SDNode *SelectBRCOND(SDNode *Node);
+
+ SDNode *SelectREADPARAM(SDNode *Node);
+ SDNode *SelectWRITEPARAM(SDNode *Node);
+ SDNode *SelectFrameIndex(SDNode *Node);
+
+ bool isImm(const SDValue &operand);
+ bool SelectImm(const SDValue &operand, SDValue &imm);
+
+ const PTXSubtarget& getSubtarget() const;
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+ switch (Node->getOpcode()) {
+ case ISD::BRCOND:
+ return SelectBRCOND(Node);
+ case PTXISD::READ_PARAM:
+ return SelectREADPARAM(Node);
+ case PTXISD::WRITE_PARAM:
+ return SelectWRITEPARAM(Node);
+ case ISD::FrameIndex:
+ return SelectFrameIndex(Node);
+ default:
+ return SelectCode(Node);
+ }
+}
+
+SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
+ assert(Node->getNumOperands() >= 3);
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Pred = Node->getOperand(1);
+ SDValue Target = Node->getOperand(2); // branch target
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(Target.getOpcode() == ISD::BasicBlock);
+ assert(Pred.getValueType() == MVT::i1);
+
+ // Emit BRAdp
+ SDValue Ops[] = { Target, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Index = Node->getOperand(1);
+
+ int OpCode;
+
+ // Get the type of parameter we are reading
+ EVT VT = Node->getValueType(0);
+ assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::READPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::READPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::READPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::READPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::READPARAMF32;
+ else {
+ assert(Type == MVT::f64 && "Unexpected type!");
+ OpCode = PTX::READPARAMF64;
+ }
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Index, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Value = Node->getOperand(1);
+
+ int OpCode;
+
+ //Node->dumpr(CurDAG);
+
+ // Get the type of parameter we are writing
+ EVT VT = Value->getValueType(0);
+ assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::WRITEPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::WRITEPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::WRITEPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::WRITEPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::WRITEPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::WRITEPARAMF64;
+ else
+ llvm_unreachable("Invalid type in SelectWRITEPARAM");
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Value, Pred, PredOp, Chain };
+ SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
+
+ //dbgs() << "SelectWRITEPARAM produced:\n\t";
+ //Ret->dumpr(CurDAG);
+
+ return Ret;
+}
+
+SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ //dbgs() << "Selecting FrameIndex at index " << FI << "\n";
+ //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
+
+ PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
+ Node->getValueType(0));
+
+ return FrameSymbol.getNode();
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+ isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+ return false;
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ // FrameIndex addresses are handled separately
+ //errs() << "SelectADDRri: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Success\n";
+ return true;
+ }
+
+ /*if (Addr.getNumOperands() == 1) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ errs() << "Success\n";
+ return true;
+ }*/
+
+ //errs() << "SelectADDRri fails on: ";
+ //Addr.getNode()->dumpr();
+
+ if (isImm(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ //errs() << "Success\n";
+ return true;
+
+ /*if (Addr.getOpcode() != ISD::ADD) {
+ // let SelectADDRii handle the [imm] case
+ if (isImm(Addr))
+ return false;
+ // it is [reg]
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+ }
+
+ if (Addr.getNumOperands() < 2)
+ return false;
+
+ // let SelectADDRii handle the [imm+imm] case
+ if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+ return false;
+
+ // try [reg+imm] and [imm+reg]
+ for (int i = 0; i < 2; i ++)
+ if (SelectImm(Addr.getOperand(1-i), Offset)) {
+ Base = Addr.getOperand(i);
+ return true;
+ }
+
+ // neither [reg+imm] nor [imm+reg]
+ return false;*/
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ // is [imm+imm]?
+ if (Addr.getOpcode() == ISD::ADD) {
+ return SelectImm(Addr.getOperand(0), Base) &&
+ SelectImm(Addr.getOperand(1), Offset);
+ }
+
+ // is [imm]?
+ if (SelectImm(Addr, Base)) {
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+ }
+
+ return false;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ //errs() << "SelectADDRlocal: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ //errs() << "Success\n";
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (!isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Offset: ";
+ //Offset.getNode()->dumpr();
+ //errs() << "Success\n";
+ return true;
+ }
+
+ //errs() << "Failure\n";
+ return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+ return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+ SDNode *node = operand.getNode();
+ if (!ConstantSDNode::classof(node))
+ return false;
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(node);
+ imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
+ operand.getValueType());
+ return true;
+}
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+ return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 0000000..3307d91
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,422 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXISelLowering.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "PTXSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ // Set up the register classes.
+ addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
+ addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+ addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+ addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ setMinFunctionAlignment(2);
+
+ ////////////////////////////////////
+ /////////// Expansion //////////////
+ ////////////////////////////////////
+
+ // (any/zero/sign) extload => load + (any/zero/sign) extend
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // f32 extload => load + fextend
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // f64 truncstore => trunc + store
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // sign_extend_inreg => sign_extend
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // br_cc => brcond
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // select_cc => setcc
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ ////////////////////////////////////
+ //////////// Legal /////////////////
+ ////////////////////////////////////
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ ////////////////////////////////////
+ //////////// Custom ////////////////
+ ////////////////////////////////////
+
+ // customise setcc to use bitwise logic if possible
+
+ setOperationAction(ISD::SETCC, MVT::i1, Custom);
+
+ // customize translation of memory addresses
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+}
+
+EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unimplemented operand");
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown opcode");
+ case PTXISD::COPY_ADDRESS:
+ return "PTXISD::COPY_ADDRESS";
+ case PTXISD::LOAD_PARAM:
+ return "PTXISD::LOAD_PARAM";
+ case PTXISD::STORE_PARAM:
+ return "PTXISD::STORE_PARAM";
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::WRITE_PARAM:
+ return "PTXISD::WRITE_PARAM";
+ case PTXISD::EXIT:
+ return "PTXISD::EXIT";
+ case PTXISD::RET:
+ return "PTXISD::RET";
+ case PTXISD::CALL:
+ return "PTXISD::CALL";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Look for X == 0, X == 1, X != 0, or X != 1
+ // We can simplify these to bitwise logic
+
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+ }
+
+ return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
+
+ SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+ dl,
+ PtrVT.getSimpleVT(),
+ targetGlobal);
+
+ return movInstr;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ break;
+ case CallingConv::PTX_Kernel:
+ MFI->setKernel(true);
+ break;
+ case CallingConv::PTX_Device:
+ MFI->setKernel(false);
+ break;
+ }
+
+ // We do one of two things here:
+ // IsKernel || SM >= 2.0 -> Use param space for arguments
+ // SM < 2.0 -> Use registers for arguments
+ if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
+ // We just need to emit the proper LOAD_PARAM ISDs
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
+ "Kernels cannot take pred operands");
+
+ unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addArgumentParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ ParamValue);
+ InVals.push_back(ArgValue);
+ }
+ }
+ else {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT RegVT = Ins[i].VT;
+ TargetRegisterClass* TRC = getRegClassFor(RegVT);
+
+ // Use a unique index in the instruction to prevent instruction folding.
+ // Yes, this is a hack.
+ SDValue Index = DAG.getTargetConstant(i, MVT::i32);
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
+ Index);
+
+ InVals.push_back(ArgValue);
+
+ MFI->addArgReg(Reg);
+ }
+ }
+
+ return Chain;
+}
+
+SDValue PTXTargetLowering::
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention.");
+ case CallingConv::PTX_Kernel:
+ assert(Outs.size() == 0 && "Kernel must return void.");
+ return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+ case CallingConv::PTX_Device:
+ assert(Outs.size() <= 1 && "Can at most return one value.");
+ break;
+ }
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ SDValue Flag;
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
+
+ if (ST.useParamSpaceForDeviceArgs()) {
+ assert(Outs.size() < 2 && "Device functions can return at most one value");
+
+ if (Outs.size() == 1) {
+ unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
+ unsigned Param = PM.addReturnParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[0]);
+ }
+ } else {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT RegVT = Outs[i].VT;
+ TargetRegisterClass* TRC = 0;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+ SDValue OutReg = DAG.getRegister(Reg, RegVT);
+
+ Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
+
+ MFI->addRetReg(Reg);
+ }
+ }
+
+ if (Flag.getNode() == 0) {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+ }
+ else {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+ }
+}
+
+SDValue
+PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
+ "Calls are not handled for the target device");
+
+ std::vector<SDValue> Ops;
+ // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
+ Ops.resize(Outs.size() + Ins.size() + 4);
+
+ Ops[0] = Chain;
+
+ // Identify the callee function
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device &&
+ "PTX function calls must be to PTX device functions");
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ Ops[Ins.size()+2] = Callee;
+
+ // Generate STORE_PARAM nodes for each function argument. In PTX, function
+ // arguments are explicitly stored into .param variables and passed as
+ // arguments. There is no register/stack-based calling convention in PTX.
+ Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32);
+ for (unsigned i = 0; i != OutVals.size(); ++i) {
+ unsigned Size = OutVals[i].getValueType().getSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[i]);
+ Ops[i+Ins.size()+4] = ParamValue;
+ }
+
+ std::vector<SDValue> InParams;
+
+ // Generate list of .param variables to hold the return value(s).
+ Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ unsigned Size = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Ops[i+2] = ParamValue;
+ InParams.push_back(ParamValue);
+ }
+
+ Ops[0] = Chain;
+
+ // Create the CALL node.
+ Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
+
+ // Create the LOAD_PARAM nodes that retrieve the function return value(s).
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ InParams[i]);
+ InVals.push_back(Load);
+ }
+
+ return Chain;
+}
+
+unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
+ // All arguments consist of one "register," regardless of the type.
+ return 1;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 0000000..4d25665
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,85 @@
+//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class PTXSubtarget;
+class PTXTargetMachine;
+
+namespace PTXISD {
+ enum NodeType {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ LOAD_PARAM,
+ STORE_PARAM,
+ READ_PARAM,
+ WRITE_PARAM,
+ EXIT,
+ RET,
+ COPY_ADDRESS,
+ CALL
+ };
+} // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+ public:
+ explicit PTXTargetLowering(TargetMachine &TM);
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
+
+ private:
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 0000000..397fdc3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,51 @@
+//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Rounding Mode Specifier
+/*class RoundingMode<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def RndDefault : RoundingMode<0>;
+def RndNearestEven : RoundingMode<1>;
+def RndNearestZero : RoundingMode<2>;
+def RndNegInf : RoundingMode<3>;
+def RndPosInf : RoundingMode<4>;
+def RndApprox : RoundingMode<5>;*/
+
+
+// Rounding Mode Operand
+def RndMode : Operand<i32> {
+ let PrintMethod = "printRoundingMode";
+}
+
+def RndDefault : PatLeaf<(i32 0)>;
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
+ (ops (i1 zero_reg), (i32 2))>;
+
+def RndModeOperand : Operand<OtherVT> {
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Instruction Types
+let Namespace = "PTX" in {
+
+ class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+ : Instruction {
+ dag OutOperandList = oops;
+ dag InOperandList = !con(iops, (ins pred:$_p));
+ let AsmString = asmstr; // Predicate printing is defined elsewhere.
+ let Pattern = pattern;
+ let isPredicable = 1;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 0000000..1b947a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,362 @@
+//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-instrinfo"
+
+#include "PTX.h"
+#include "PTXInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PTXGenInstrInfo.inc"
+
+using namespace llvm;
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+ : PTXGenInstrInfo(),
+ RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+ const TargetRegisterClass *cls;
+ const int opcode;
+} map[] = {
+ { &PTX::RegI16RegClass, PTX::MOVU16rr },
+ { &PTX::RegI32RegClass, PTX::MOVU32rr },
+ { &PTX::RegI64RegClass, PTX::MOVU64rr },
+ { &PTX::RegF32RegClass, PTX::MOVF32rr },
+ { &PTX::RegF64RegClass, PTX::MOVF64rr },
+ { &PTX::RegPredRegClass, PTX::MOVPREDrr }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const {
+
+ const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
+ // "Invalid register copy between two register classes");
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
+ if (map[i].cls == MRI.getRegClass(DstReg)) {
+ const MCInstrDesc &MCID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPredicate(MI);
+ return;
+ }
+ }
+
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
+ if (DstRC != SrcRC)
+ return false;
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+ if (DstRC == map[i].cls) {
+ const MCInstrDesc &MCID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
+ AddDefaultPredicate(MI);
+ return true;
+ }
+
+ return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case PTX::MOVU16rr:
+ case PTX::MOVU32rr:
+ case PTX::MOVU64rr:
+ case PTX::MOVF32rr:
+ case PTX::MOVF64rr:
+ case PTX::MOVPREDrr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+ "Invalid register-register move instruction");
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+}
+
+// predicate support
+
+bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int i = MI->findFirstPredOperandIdx();
+ return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
+}
+
+bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+}
+
+bool PTXInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ if (Pred.size() < 2)
+ llvm_unreachable("lesser than 2 predicate operands are provided");
+
+ int i = MI->findFirstPredOperandIdx();
+ if (i == -1)
+ llvm_unreachable("missing predicate operand");
+
+ MI->getOperand(i).setReg(Pred[0].getReg());
+ MI->getOperand(i+1).setImm(Pred[1].getImm());
+
+ return true;
+}
+
+bool PTXInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ const MachineOperand &PredReg1 = Pred1[0];
+ const MachineOperand &PredReg2 = Pred2[0];
+ if (PredReg1.getReg() != PredReg2.getReg())
+ return false;
+
+ const MachineOperand &PredOp1 = Pred1[1];
+ const MachineOperand &PredOp2 = Pred2[1];
+ if (PredOp1.getImm() != PredOp2.getImm())
+ return false;
+
+ return true;
+}
+
+bool PTXInstrInfo::
+DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // If an instruction sets a predicate register, it defines a predicate.
+
+ // TODO supprot 5-operand format of setp instruction
+
+ if (MI->getNumOperands() < 1)
+ return false;
+
+ const MachineOperand &MO = MI->getOperand(0);
+
+ if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
+ return false;
+
+ Pred.push_back(MO);
+ Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
+ return true;
+}
+
+// branch support
+
+bool PTXInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // TODO implement cases when AllowModify is true
+
+ if (MBB.empty())
+ return true;
+
+ MachineBasicBlock::const_iterator iter = MBB.end();
+ const MachineInstr& instLast1 = *--iter;
+ const MCInstrDesc &desc1 = instLast1.getDesc();
+ // for special case that MBB has only 1 instruction
+ const bool IsSizeOne = MBB.size() == 1;
+ // if IsSizeOne is true, *--iter and instLast2 are invalid
+ // we put a dummy value in instLast2 and desc2 since they are used
+ const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
+ const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+
+ DEBUG(dbgs() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
+
+ // this block ends with no branches
+ if (!IsAnyKindOfBranch(instLast1)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
+ return false;
+ }
+
+ // this block ends with only an unconditional branch
+ if (desc1.isUnconditionalBranch() &&
+ // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
+ (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
+ TBB = GetBranchTarget(instLast1);
+ return false;
+ }
+
+ // this block ends with a conditional branch and
+ // it falls through to a successor block
+ if (desc1.isConditionalBranch() &&
+ IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
+ TBB = GetBranchTarget(instLast1);
+ int i = instLast1.findFirstPredOperandIdx();
+ Cond.push_back(instLast1.getOperand(i));
+ Cond.push_back(instLast1.getOperand(i+1));
+ return false;
+ }
+
+ // when IsSizeOne is true, we are done
+ if (IsSizeOne)
+ return true;
+
+ // this block ends with a conditional branch
+ // followed by an unconditional branch
+ if (desc2.isConditionalBranch() &&
+ desc1.isUnconditionalBranch()) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
+ TBB = GetBranchTarget(instLast2);
+ FBB = GetBranchTarget(instLast1);
+ int i = instLast2.findFirstPredOperandIdx();
+ Cond.push_back(instLast2.getOperand(i));
+ Cond.push_back(instLast2.getOperand(i+1));
+ return false;
+ }
+
+ // branch cannot be understood
+ DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
+ return true;
+}
+
+unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ unsigned count = 0;
+ while (!MBB.empty())
+ if (IsAnyKindOfBranch(MBB.back())) {
+ MBB.pop_back();
+ ++count;
+ } else
+ break;
+ DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
+ return count;
+}
+
+unsigned PTXInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: TBB: (NULL)\n");
+ DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: FBB: (NULL)\n");
+ DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
+
+ assert(TBB && "TBB is NULL");
+
+ if (FBB) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
+ return 2;
+ } else if (Cond.size()) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
+ return 1;
+ }
+}
+
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "storeRegToStackSlot should not be called for PTX");
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "loadRegFromStackSlot should not be called for PTX");
+}
+
+// static helper routines
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ SDValue ops[] = { Op1, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ SDValue ops[] = { Op1, Op2, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
+ if (MI->findFirstPredOperandIdx() == -1) {
+ MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
+ }
+}
+
+bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
+ const MCInstrDesc &desc = inst.getDesc();
+ return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+}
+
+bool PTXInstrInfo::
+IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
+ for (MachineBasicBlock::const_succ_iterator
+ i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
+ if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
+ return true;
+ return false;
+}
+
+MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
+ // FIXME So far all branch instructions put destination in 1st operand
+ const MachineOperand& target = inst.getOperand(0);
+ assert(target.isMBB() && "FIXME: detect branch target operand");
+ return target.getMBB();
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 0000000..871f1ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,133 @@
+//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PTXGenInstrInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class MachineSDNode;
+class SDValue;
+class SelectionDAG;
+
+class PTXInstrInfo : public PTXGenInstrInfo {
+private:
+ const PTXRegisterInfo RI;
+ PTXTargetMachine &TM;
+
+public:
+ explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+ virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ // predicate support
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ // PTX is fully-predicable
+ virtual bool isPredicable(MachineInstr *MI) const { return true; }
+
+ // branch support
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ // Memory operand folding for spills
+ // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+ // loadRegFromStackSlot. Doing so will get rid of the "stack" registers
+ // we currently use to spill, though I doubt the overall effect on ptxas
+ // output will be large. I have yet to see a case where ptxas is unable
+ // to see through the "stack" register usage and hence generates
+ // efficient code anyway.
+ // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ // MachineInstr* MI,
+ // const SmallVectorImpl<unsigned> &Ops,
+ // int FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass* RC,
+ const TargetRegisterInfo* TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ // static helper routines
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1);
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2);
+
+ static void AddDefaultPredicate(MachineInstr *MI);
+
+ static bool IsAnyKindOfBranch(const MachineInstr& inst);
+
+ static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
+
+ static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
+}; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 0000000..a3fcea9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,1013 @@
+//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+// Shader Model Support
+def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
+def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
+def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
+def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
+
+// PTX Version Support
+def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
+def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
+def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
+
+// Fused-Multiply Add
+def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
+def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
+
+
+
+// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
+// [SDNPHasChain, SDNPOutGlue]>;
+// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
+// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
+ [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+// Branch & call targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+ : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+ : SDNode<"PTXISD::RET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PTXcopyaddress
+ : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// For floating-point instructions, we cannot just embed the pattern into the
+// instruction definition since we need to muck around with the rounding mode,
+// and I do not know how to insert constants into instructions directly from
+// pattern matches.
+
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr> {
+ def rr32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
+ def ri32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, f32imm:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
+ def rr64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
+ def ri64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, f64imm:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
+}
+
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr> {
+ def rr32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, RegF32:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
+ def ri32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
+ def rr64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, RegF64:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
+ def ri64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
+}
+
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr> {
+ def rrr32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rri32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rii32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rrr64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rri64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rii64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+}
+
+//===- Integer Instructions - 3 Operand Form ------------------------------===//
+multiclass PTX_INT3<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===//
+multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+ def ripreds : InstPTX<(outs RegPred:$d),
+ (ins RegPred:$a, i1imm:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
+ def rrpreds : InstPTX<(outs RegPred:$d),
+ (ins RegPred:$a, RegPred:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Integer Shift Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+ def ir16 : InstPTX<(outs RegI16:$d),
+ (ins i16imm:$a, RegI16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
+ def ir32 : InstPTX<(outs RegI32:$d),
+ (ins i32imm:$a, RegI32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
+ def ir64 : InstPTX<(outs RegI64:$d),
+ (ins i64imm:$a, RegI64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
+}
+
+//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===//
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
+ CondCode cmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
+ def ri
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+ def rr_and_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_and_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
+ def rr_or_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_or_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ def rr_xor_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_xor_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
+
+ def rr_and_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
+ def ri_and_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
+ def rr_or_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
+ def ri_or_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
+ def rr_xor_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
+ def ri_xor_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
+}
+
+//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===//
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls,
+ CondCode ucmp, CondCode ocmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+ def rr_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+ def ri_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>;
+ def ri_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>;
+
+ def rr_and_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
+ def rr_and_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
+
+ def rr_or_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ def rr_or_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+ def rr_xor_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
+ def rr_xor_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
+
+ def rr_and_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
+ def rr_and_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
+
+ def rr_or_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
+ def rr_or_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
+
+ def rr_xor_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
+ def rr_xor_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
+}
+
+//===- Select Predicate Instructions - 4 Operand Form ---------------------===//
+multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls,
+ SDNode immnode> {
+ def rr
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+ def ri
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>;
+ def ii
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : PTX_INT3<"add", add>;
+defm SUB : PTX_INT3<"sub", sub>;
+defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : PTX_INT3<"div", udiv>;
+defm SDIV : PTX_INT3_SIGNED<"div", sdiv>;
+defm REM : PTX_INT3<"rem", urem>;
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+// FNEG
+defm FNEG : PTX_FLOAT_2OP<"neg">;
+
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add">;
+defm FSUB : PTX_FLOAT_3OP<"sub">;
+defm FMUL : PTX_FLOAT_3OP<"mul">;
+defm FDIV : PTX_FLOAT_3OP<"div">;
+
+// Multi-operation hybrid instructions
+defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>;
+
+
+///===- Floating-Point Intrinsic Instructions -----------------------------===//
+
+// SQRT
+def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+
+// SIN
+def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sin$r.f64\t$d, $a", []>;
+def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sin$r.f64\t$d, $a", []>;
+
+// COS
+def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "cos$r.f64\t$d, $a", []>;
+def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "cos$r.f64\t$d, $a", []>;
+
+
+
+
+///===- Comparison and Selection Instructions -----------------------------===//
+
+// .setp
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">;
+defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">;
+defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">;
+defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">;
+defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">;
+defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">;
+defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>;
+defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>;
+defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>;
+defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>;
+defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>;
+defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>;
+
+defm AND : PTX_LOGIC<"and", and>;
+defm OR : PTX_LOGIC<"or", or>;
+defm XOR : PTX_LOGIC<"xor", xor>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+// any_extend
+// Implement the anyext instruction in terms of the PTX cvt instructions.
+//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>;
+
+// bitconvert
+// These instructions implement the bit-wise conversion between integer and
+// floating-point types.
+def MOVi32f32
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>;
+def MOVf32i32
+ : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>;
+def MOVi64f64
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>;
+def MOVf64i64
+ : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>;
+
+let neverHasSideEffects = 1 in {
+ def MOVPREDrr
+ : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
+ def MOVU16rr
+ : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
+ def MOVU32rr
+ : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
+ def MOVU64rr
+ : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
+ def MOVF32rr
+ : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
+ def MOVF64rr
+ : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVPREDri
+ : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+ [(set RegPred:$d, imm:$a)]>;
+ def MOVU16ri
+ : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+ [(set RegI16:$d, imm:$a)]>;
+ def MOVU32ri
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RegI32:$d, imm:$a)]>;
+ def MOVU64ri
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RegI64:$d, imm:$a)]>;
+ def MOVF32ri
+ : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+ [(set RegF32:$d, fpimm:$a)]>;
+ def MOVF64ri
+ : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+ [(set RegF64:$d, fpimm:$a)]>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVaddr32
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVaddr64
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
+// PTX cvt instructions
+// Note all of these may actually be used, we just define all possible patterns
+// here (that make sense).
+// FIXME: Can we collapse this somehow into a multiclass def?
+
+// To i16
+def CVTu16u32
+ : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>;
+def CVTu16u64
+ : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
+def CVTu16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u16.f32\t$d, $a", []>;
+def CVTs16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s16.f32\t$d, $a", []>;
+def CVTu16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u16.f64\t$d, $a", []>;
+def CVTs16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s16.f64\t$d, $a", []>;
+
+// To i32
+def CVTu32u16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>;
+def CVTs32s16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>;
+def CVTu32u64
+ : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
+def CVTu32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u32.f32\t$d, $a", []>;
+def CVTs32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s32.f32\t$d, $a", []>;
+def CVTu32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u32.f64\t$d, $a", []>;
+def CVTs32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s32.f64\t$d, $a", []>;
+
+// To i64
+def CVTu64u16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>;
+def CVTs64s16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>;
+def CVTu64u32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>;
+def CVTs64s32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
+def CVTu64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u64.f32\t$d, $a", []>;
+def CVTs64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s64.f32\t$d, $a", []>;
+def CVTu64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u64.f64\t$d, $a", []>;
+def CVTs64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s64.f64\t$d, $a", []>;
+
+// To f32
+def CVTf32u16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.u16\t$d, $a", []>;
+def CVTf32s16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.s16\t$d, $a", []>;
+def CVTf32u32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.u32\t$d, $a", []>;
+def CVTf32s32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.s32\t$d, $a", []>;
+def CVTf32u64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.u64\t$d, $a", []>;
+def CVTf32s64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.s64\t$d, $a", []>;
+def CVTf32f64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.f32.f64\t$d, $a", []>;
+
+// To f64
+def CVTf64u16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.u16\t$d, $a", []>;
+def CVTf64s16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.s16\t$d, $a", []>;
+def CVTf64u32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.u32\t$d, $a", []>;
+def CVTf64s32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.s32\t$d, $a", []>;
+def CVTf64u64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.u64\t$d, $a", []>;
+def CVTf64s64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.s64\t$d, $a", []>;
+def CVTf64f32
+ : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
+
+ ///===- Control Flow Instructions -----------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a RegPred register) here
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
+
+let hasSideEffects = 1 in {
+ def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
+}
+
+///===- Parameter Passing Pseudo-Instructions -----------------------------===//
+
+def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
+ "mov.pred\t$a, %param$b", []>;
+def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
+ "mov.b16\t$a, %param$b", []>;
+def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
+ "mov.b32\t$a, %param$b", []>;
+def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
+ "mov.b64\t$a, %param$b", []>;
+def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
+ "mov.f32\t$a, %param$b", []>;
+def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
+ "mov.f64\t$a, %param$b", []>;
+
+def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
+def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
+def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
+def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
+def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
+def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selection Patterns
+//===----------------------------------------------------------------------===//
+
+// FADD
+def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)),
+ (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)),
+ (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)),
+ (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)),
+ (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FSUB
+def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)),
+ (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)),
+ (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)),
+ (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)),
+ (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL
+def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)),
+ (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)),
+ (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)),
+ (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)),
+ (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FDIV
+def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)),
+ (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)),
+ (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)),
+ (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
+ (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL+FADD
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
+ (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>;
+
+// FNEG
+def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>;
+
+// FSQRT
+def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>;
+
+// FSIN
+def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>;
+
+// FCOS
+def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>;
+
+// Type conversion notes:
+// - PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+// - PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
+
+// Conversion to pred
+def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>;
+def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>;
+def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>;
+def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>;
+def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>;
+
+// Conversion to u16
+def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
+def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
+def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
+def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
+def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
+
+// Conversion to u32
+def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>;
+def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
+def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
+def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
+
+// Conversion to u64
+def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a,
+ 0xFFFFFFFFFFFFFFFF, 0)>;
+def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>;
+def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
+def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
+
+// Conversion to f32
+def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
+ (MOVf32i32 0x3F800000), (MOVf32i32 0))>;
+def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
+def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
+
+// Conversion to f64
+def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
+ (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
+def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
+def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
+
+
+///===- Intrinsic Instructions --------------------------------------------===//
+include "PTXIntrinsicInstrInfo.td"
+
+///===- Load/Store Instructions -------------------------------------------===//
+include "PTXInstrLoadStore.td"
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
new file mode 100644
index 0000000..9b4f56c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
@@ -0,0 +1,278 @@
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX load/store instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Addressing Predicates
+// We have to differentiate between 32- and 64-bit pointer types
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments for Loads/Stores
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Constant;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
+def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI64, i64imm);
+}
+def LOCALri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def LOCALri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+def MEMii32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+ let PrintMethod = "printReturnOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// Load/store .param space
+def PTXloadparam
+ : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+ : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+def PTXreadparam
+ : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXwriteparam
+ : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Classes for loads/stores
+//===----------------------------------------------------------------------===//
+multiclass PTX_LD<string opstr, string typestr,
+ RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
+ PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
+ def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal32:$a))]>;
+ def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal64:$a))]>;
+ def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal32:$a)]>;
+ def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal64:$a)]>;
+}
+
+multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
+ let hasSideEffects = 1 in {
+ def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
+ !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (PTXloadparam texternalsym:$a))]>;
+ def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
+ !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
+ [(PTXstoreparam texternalsym:$d, RC:$a)]>;
+ }
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for loads/stores
+//===----------------------------------------------------------------------===//
+
+// Global/shared stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// Global/shared/constant loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// Param loads/stores
+defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
+defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
+defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
+defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
+defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
+defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
+
+// Local loads/stores
+defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
+defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
+defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
+defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
+defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
+defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 0000000..9de1cb6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,110 @@
+//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+ : InstPTX<(outs RegI64:$d), (ins),
+ !strconcat("mov.u64\t$d, %", regname),
+ [(set RegI64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+ : InstPTX<(outs RegI32:$d), (ins),
+ !strconcat("mov.u32\t$d, %", regname),
+ [(set RegI32:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
+// int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
+
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
+// int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
+
+//def PTX_READ_CTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
+
+//def PTX_READ_NCTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+ : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+ : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+ [(int_ptx_bar_sync imm:$i)]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 0000000..468ce93
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,544 @@
+//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+ formatted_raw_ostream &OS;
+ const MCAsmInfo &MAI;
+ OwningPtr<MCInstPrinter> InstPrinter;
+ OwningPtr<MCCodeEmitter> Emitter;
+
+ SmallString<128> CommentToEmit;
+ raw_svector_ostream CommentStream;
+
+ unsigned IsVerboseAsm : 1;
+ unsigned ShowInst : 1;
+
+public:
+ PTXMCAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &os,
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *printer,
+ MCCodeEmitter *emitter,
+ bool showInst)
+ : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+ IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst) {
+ if (InstPrinter && IsVerboseAsm)
+ InstPrinter->setCommentStream(CommentStream);
+ }
+
+ ~PTXMCAsmStreamer() {}
+
+ inline void EmitEOL() {
+ // If we don't have any comments, just emit a \n.
+ if (!IsVerboseAsm) {
+ OS << '\n';
+ return;
+ }
+ EmitCommentsAndEOL();
+ }
+ void EmitCommentsAndEOL();
+
+ /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+ /// all.
+ virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+ /// hasRawTextSupport - We support EmitRawText.
+ virtual bool hasRawTextSupport() const { return true; }
+
+ /// AddComment - Add a comment that can be emitted to the generated .s
+ /// file if applicable as a QoI issue to make the output of the compiler
+ /// more readable. This only affects the MCAsmStreamer, and only when
+ /// verbose assembly output is enabled.
+ virtual void AddComment(const Twine &T);
+
+ /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+ virtual void AddEncodingComment(const MCInst &Inst);
+
+ /// GetCommentOS - Return a raw_ostream that comments can be written to.
+ /// Unlike AddComment, you are required to terminate comments with \n if you
+ /// use this method.
+ virtual raw_ostream &GetCommentOS() {
+ if (!IsVerboseAsm)
+ return nulls(); // Discard comments unless in verbose asm mode.
+ return CommentStream;
+ }
+
+ /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+ virtual void AddBlankLine() {
+ EmitEOL();
+ }
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void ChangeSection(const MCSection *Section);
+ virtual void InitSections() { /* PTX does not use sections */ }
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+ virtual void EmitThumbFunc(MCSymbol *Func);
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize);
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+ ///
+ /// @param Symbol - The common symbol to emit.
+ /// @param Size - The size of the common symbol.
+ /// @param ByteAlignment - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+ virtual void EmitULEB128Value(const MCExpr *Value);
+ virtual void EmitSLEB128Value(const MCExpr *Value);
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+ virtual void EmitInstruction(const MCInst &Inst);
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+ /// the specified string in the output .s file. This capability is
+ /// indicated by the hasRawTextSupport() predicate.
+ virtual void EmitRawText(StringRef String);
+
+ virtual void Finish();
+
+ /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+ if (!IsVerboseAsm) return;
+
+ // Make sure that CommentStream is flushed.
+ CommentStream.flush();
+
+ T.toVector(CommentToEmit);
+ // Each comment goes on its own line.
+ CommentToEmit.push_back('\n');
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+ if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+ OS << '\n';
+ return;
+ }
+
+ CommentStream.flush();
+ StringRef Comments = CommentToEmit.str();
+
+ assert(Comments.back() == '\n' &&
+ "Comment array not newline terminated");
+ do {
+ // Emit a line of comments.
+ OS.PadToColumn(MAI.getCommentColumn());
+ size_t Position = Comments.find('\n');
+ OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+ Comments = Comments.substr(Position+1);
+ } while (!Comments.empty());
+
+ CommentToEmit.clear();
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+ assert(Bytes && "Invalid size!");
+ return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ OS << *Symbol << MAI.getLabelSuffix();
+ EmitEOL();
+ Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << *Symbol << " = " << *Value;
+ EmitEOL();
+
+ // FIXME: Lift context changes into super class.
+ Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+ MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+ OS << '"';
+
+ for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+ unsigned char C = Data[i];
+ if (C == '"' || C == '\\') {
+ OS << '\\' << (char)C;
+ continue;
+ }
+
+ if (isprint((unsigned char)C)) {
+ OS << (char)C;
+ continue;
+ }
+
+ switch (C) {
+ case '\b': OS << "\\b"; break;
+ case '\f': OS << "\\f"; break;
+ case '\n': OS << "\\n"; break;
+ case '\r': OS << "\\r"; break;
+ case '\t': OS << "\\t"; break;
+ default:
+ OS << '\\';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ break;
+ }
+ }
+
+ OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ if (Data.empty()) return;
+
+ if (Data.size() == 1) {
+ OS << MAI.getData8bitsDirective(AddrSpace);
+ OS << (unsigned)(unsigned char)Data[0];
+ EmitEOL();
+ return;
+ }
+
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii
+ if (MAI.getAscizDirective() && Data.back() == 0) {
+ OS << MAI.getAscizDirective();
+ Data = Data.substr(0, Data.size()-1);
+ } else {
+ OS << MAI.getAsciiDirective();
+ }
+
+ OS << ' ';
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ const char *Directive = 0;
+ switch (Size) {
+ default: break;
+ case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+ case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+ case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+ case 8:
+ Directive = MAI.getData64bitsDirective(AddrSpace);
+ // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+ if (Directive) break;
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ } else {
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ }
+ return;
+ }
+
+ assert(Directive && "Invalid size for machine code value!");
+ OS << Directive << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ assert(MAI.getGPRel32Directive() != 0);
+ OS << MAI.getGPRel32Directive() << *Value;
+ EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ if (NumBytes == 0) return;
+
+ if (AddrSpace == 0)
+ if (const char *ZeroDirective = MAI.getZeroDirective()) {
+ OS << ZeroDirective << NumBytes;
+ if (FillValue != 0)
+ OS << ',' << (int)FillValue;
+ EmitEOL();
+ return;
+ }
+
+ // Emit a byte at a time.
+ MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // Some assemblers don't support non-power of two alignments, so we always
+ // emit alignments as a power of two if possible.
+ if (isPowerOf2_32(ByteAlignment)) {
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << MAI.getAlignDirective(); break;
+ // FIXME: use MAI for this!
+ case 2: OS << ".p2alignw "; break;
+ case 4: OS << ".p2alignl "; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ if (MAI.getAlignmentIsInBytes())
+ OS << ByteAlignment;
+ else
+ OS << Log2_32(ByteAlignment);
+
+ if (Value || MaxBytesToEmit) {
+ OS << ", 0x";
+ OS.write_hex(truncateToSize(Value, ValueSize));
+
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ }
+ EmitEOL();
+ return;
+ }
+
+ // Non-power of two alignment. This is not widely supported by assemblers.
+ // FIXME: Parameterize this based on MAI.
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << ".balign"; break;
+ case 2: OS << ".balignw"; break;
+ case 4: OS << ".balignl"; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ OS << ' ' << ByteAlignment;
+ OS << ", " << truncateToSize(Value, ValueSize);
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {}
+
+void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+ assert(MAI.hasSingleParameterDotFile());
+ OS << "\t.file\t";
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Filename){
+ OS << "\t.file\t" << FileNo << ' ';
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ // Show the encoding in a comment if we have a code emitter.
+ if (Emitter)
+ AddEncodingComment(Inst);
+
+ // Show the MCInst if enabled.
+ if (ShowInst) {
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+ GetCommentOS() << "\n";
+ }
+
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+ if (InstPrinter)
+ InstPrinter->printInst(&Inst, OS, "");
+ else
+ Inst.print(OS, &MAI);
+ EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+ if (!String.empty() && String.back() == '\n')
+ String = String.substr(0, String.size()-1);
+ OS << String;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::Finish() {}
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ MCInstPrinter *IP,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
+ bool ShowInst) {
+ return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+ IP, CE, ShowInst);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
new file mode 100644
index 0000000..142e639
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
@@ -0,0 +1,32 @@
+//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PTX MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXAsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ OutMI.addOperand(AP.lowerOperand(MO));
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 0000000..b33a273
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,68 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace llvm {
+ /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+ /// function information for PTXAsmPrinter
+ ///
+ class PTXMFInfoExtract : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ public:
+ PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX Machine Function Info Extractor";
+ }
+ }; // class PTXMFInfoExtract
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Generate list of all virtual registers used in this function
+ for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ MFI->addVirtualRegister(TRC, Reg);
+ }
+
+ return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 0000000..3b985f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,164 @@
+//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+ bool IsKernel;
+ DenseSet<unsigned> RegArgs;
+ DenseSet<unsigned> RegRets;
+
+ typedef std::vector<unsigned> RegisterList;
+ typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
+ typedef DenseMap<unsigned, std::string> RegisterNameMap;
+ typedef DenseMap<int, std::string> FrameMap;
+
+ RegisterMap UsedRegs;
+ RegisterNameMap RegNames;
+ FrameMap FrameSymbols;
+
+ PTXParamManager ParamManager;
+
+public:
+ typedef DenseSet<unsigned>::const_iterator reg_iterator;
+
+ PTXMachineFunctionInfo(MachineFunction &MF)
+ : IsKernel(false) {
+ UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
+ }
+
+ /// getParamManager - Returns the PTXParamManager instance for this function.
+ PTXParamManager& getParamManager() { return ParamManager; }
+ const PTXParamManager& getParamManager() const { return ParamManager; }
+
+ /// setKernel/isKernel - Gets/sets a flag that indicates if this function is
+ /// a PTX kernel function.
+ void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
+ bool isKernel() const { return IsKernel; }
+
+ /// argreg_begin/argreg_end - Returns iterators to the set of registers
+ /// containing function arguments.
+ reg_iterator argreg_begin() const { return RegArgs.begin(); }
+ reg_iterator argreg_end() const { return RegArgs.end(); }
+
+ /// retreg_begin/retreg_end - Returns iterators to the set of registers
+ /// containing the function return values.
+ reg_iterator retreg_begin() const { return RegRets.begin(); }
+ reg_iterator retreg_end() const { return RegRets.end(); }
+
+ /// addRetReg - Adds a register to the set of return-value registers.
+ void addRetReg(unsigned Reg) {
+ if (!RegRets.count(Reg)) {
+ RegRets.insert(Reg);
+ std::string name;
+ name = "%ret";
+ name += utostr(RegRets.size() - 1);
+ RegNames[Reg] = name;
+ }
+ }
+
+ /// addArgReg - Adds a register to the set of function argument registers.
+ void addArgReg(unsigned Reg) {
+ RegArgs.insert(Reg);
+ std::string name;
+ name = "%param";
+ name += utostr(RegArgs.size() - 1);
+ RegNames[Reg] = name;
+ }
+
+ /// addVirtualRegister - Adds a virtual register to the set of all used
+ /// registers in the function.
+ void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
+ std::string name;
+
+ // Do not count registers that are argument/return registers.
+ if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
+ UsedRegs[TRC].push_back(Reg);
+ if (TRC == PTX::RegPredRegisterClass)
+ name = "%p";
+ else if (TRC == PTX::RegI16RegisterClass)
+ name = "%rh";
+ else if (TRC == PTX::RegI32RegisterClass)
+ name = "%r";
+ else if (TRC == PTX::RegI64RegisterClass)
+ name = "%rd";
+ else if (TRC == PTX::RegF32RegisterClass)
+ name = "%f";
+ else if (TRC == PTX::RegF64RegisterClass)
+ name = "%fd";
+ else
+ llvm_unreachable("Invalid register class");
+
+ name += utostr(UsedRegs[TRC].size() - 1);
+ RegNames[Reg] = name;
+ }
+ }
+
+ /// getRegisterName - Returns the name of the specified virtual register. This
+ /// name is used during PTX emission.
+ const char *getRegisterName(unsigned Reg) const {
+ if (RegNames.count(Reg))
+ return RegNames.find(Reg)->second.c_str();
+ else if (Reg == PTX::NoRegister)
+ return "%noreg";
+ else
+ llvm_unreachable("Register not in register name map");
+ }
+
+ /// getNumRegistersForClass - Returns the number of virtual registers that are
+ /// used for the specified register class.
+ unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
+ return UsedRegs.lookup(TRC).size();
+ }
+
+ /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
+ const char* getFrameSymbol(int FrameIndex) {
+ if (FrameSymbols.count(FrameIndex)) {
+ return FrameSymbols.lookup(FrameIndex).c_str();
+ } else {
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
+ // The whole point of caching this name is to ensure the pointer we pass
+ // to any getExternalSymbol() calls will remain valid for the lifetime of
+ // the back-end instance. This is to work around an issue in SelectionDAG
+ // where symbol names are expected to be life-long strings.
+ FrameSymbols[FrameIndex] = Name;
+ return FrameSymbols[FrameIndex].c_str();
+ }
+ }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
new file mode 100644
index 0000000..7753787
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
@@ -0,0 +1,73 @@
+//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXParamManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXParamManager.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+PTXParamManager::PTXParamManager() {
+}
+
+unsigned PTXParamManager::addArgumentParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_ARGUMENT;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__param_";
+ Name += utostr(ArgumentParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ArgumentParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addReturnParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_RETURN;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__ret_";
+ Name += utostr(ReturnParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ReturnParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addLocalParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_LOCAL;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__localparam_";
+ Name += utostr(LocalParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ LocalParams.push_back(Index);
+
+ return Index;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
new file mode 100644
index 0000000..9fd2de5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
@@ -0,0 +1,86 @@
+//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTXParamManager class, which manages all defined .param
+// variables for a particular function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_PARAM_MANAGER_H
+#define PTX_PARAM_MANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// PTXParamManager - This class manages all .param variables defined for a
+/// particular function.
+class PTXParamManager {
+private:
+
+ /// PTXParamType - Type of a .param variable
+ enum PTXParamType {
+ PTX_PARAM_TYPE_ARGUMENT,
+ PTX_PARAM_TYPE_RETURN,
+ PTX_PARAM_TYPE_LOCAL
+ };
+
+ /// PTXParam - Definition of a PTX .param variable
+ struct PTXParam {
+ PTXParamType Type;
+ unsigned Size;
+ std::string Name;
+ };
+
+ DenseMap<unsigned, PTXParam> AllParams;
+ SmallVector<unsigned, 4> ArgumentParams;
+ SmallVector<unsigned, 4> ReturnParams;
+ SmallVector<unsigned, 4> LocalParams;
+
+public:
+
+ typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
+
+ PTXParamManager();
+
+ param_iterator arg_begin() const { return ArgumentParams.begin(); }
+ param_iterator arg_end() const { return ArgumentParams.end(); }
+ param_iterator ret_begin() const { return ReturnParams.begin(); }
+ param_iterator ret_end() const { return ReturnParams.end(); }
+ param_iterator local_begin() const { return LocalParams.begin(); }
+ param_iterator local_end() const { return LocalParams.end(); }
+
+ /// addArgumentParam - Returns a new .param used as an argument.
+ unsigned addArgumentParam(unsigned Size);
+
+ /// addReturnParam - Returns a new .param used as a return argument.
+ unsigned addReturnParam(unsigned Size);
+
+ /// addLocalParam - Returns a new .param used as a local .param variable.
+ unsigned addLocalParam(unsigned Size);
+
+ /// getParamName - Returns the name of the parameter as a string.
+ const std::string &getParamName(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Name;
+ }
+
+ /// getParamSize - Returns the size of the parameter in bits.
+ unsigned getParamSize(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Size;
+ }
+
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
new file mode 100644
index 0000000..2d2d5c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
@@ -0,0 +1,58 @@
+//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a register allocator for PTX code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-reg-alloc"
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+
+using namespace llvm;
+
+namespace {
+ // Special register allocator for PTX.
+ class PTXRegAlloc : public MachineFunctionPass {
+ public:
+ static char ID;
+ PTXRegAlloc() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char* getPassName() const {
+ return "PTX Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // We do not actually do anything (at least not yet).
+ return false;
+ }
+ };
+
+ char PTXRegAlloc::ID = 0;
+
+ static RegisterRegAlloc
+ ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
+}
+
+FunctionPass *llvm::createPTXRegisterAllocator() {
+ return new PTXRegAlloc();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
new file mode 100644
index 0000000..c806266
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -0,0 +1,74 @@
+//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &tii)
+ // PTX does not have a return address register.
+ : PTXGenRegisterInfo(0), TII(tii) {
+}
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS) const {
+ unsigned Index;
+ MachineInstr &MI = *II;
+ //MachineBasicBlock &MBB = *MI.getParent();
+ //DebugLoc dl = MI.getDebugLoc();
+ //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+
+ //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
+
+ llvm_unreachable("FrameIndex should have been previously eliminated!");
+
+ Index = 0;
+ while (!MI.getOperand(Index).isFI()) {
+ ++Index;
+ assert(Index < MI.getNumOperands() &&
+ "Instr does not have a FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(Index).getIndex();
+
+ DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+ DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+ DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+ //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
+ //.addReg(Reg, RegState::Define).addImm(FrameIndex);
+ //if (MI2->findFirstPredOperandIdx() == -1) {
+ // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ //}
+ //MI2->dump();
+
+ //MachineOperand ESOp = MachineOperand::CreateES("__local__");
+
+ // This frame index is post stack slot re-use assignments
+ //MI.getOperand(Index).ChangeToRegister(Reg, false);
+ MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+ //MI.getOperand(Index) = ESOp;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 0000000..55fafe4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,57 @@
+//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_HEADER
+#include "PTXGenRegisterInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+public:
+ PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &tii);
+
+ virtual const unsigned
+ *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs; // save nothing
+ }
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved; // reserve no regs
+ }
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS = NULL) const;
+
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+ llvm_unreachable("PTX does not have a frame register");
+ return 0;
+ }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 0000000..6ed6d3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,37 @@
+
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+ let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+// The generated register info code throws warnings for empty register classes
+// (e.g. zero-length arrays), so we use a dummy register here just to prevent
+// these warnings.
+def DUMMY_REG : PTXReg<"R0">;
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
new file mode 100644
index 0000000..50ef14a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -0,0 +1,149 @@
+//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-selectiondag-info"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
+}
+
+PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
+}
+
+SDValue
+PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ // Always inline memcpys. In PTX, we do not have a C library that provides
+ // a memcpy function.
+ //if (!AlwaysInline)
+ // return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+ EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+SDValue PTXSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ llvm_unreachable("memset lowering not implemented for PTX yet");
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
new file mode 100644
index 0000000..e0c7167
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
@@ -0,0 +1,53 @@
+//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTX subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXSELECTIONDAGINFO_H
+#define PTXSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
+/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
+class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PTXSubtarget *Subtarget;
+
+public:
+ explicit PTXSelectionDAGInfo(const TargetMachine &TM);
+ ~PTXSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 0000000..1eb57d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,66 @@
+//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+#include "PTX.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PTXGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : PTXGenSubtargetInfo(TT, CPU, FS),
+ PTXTarget(PTX_COMPUTE_1_0),
+ PTXVersion(PTX_VERSION_2_0),
+ SupportsDouble(false),
+ SupportsFMA(true),
+ Is64Bit(is64Bit) {
+ std::string TARGET = CPU;
+ if (TARGET.empty())
+ TARGET = "generic";
+ ParseSubtargetFeatures(TARGET, FS);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+ switch(PTXTarget) {
+ default: llvm_unreachable("Unknown PTX target");
+ case PTX_SM_1_0: return "sm_10";
+ case PTX_SM_1_1: return "sm_11";
+ case PTX_SM_1_2: return "sm_12";
+ case PTX_SM_1_3: return "sm_13";
+ case PTX_SM_2_0: return "sm_20";
+ case PTX_SM_2_1: return "sm_21";
+ case PTX_SM_2_2: return "sm_22";
+ case PTX_SM_2_3: return "sm_23";
+ case PTX_COMPUTE_1_0: return "compute_10";
+ case PTX_COMPUTE_1_1: return "compute_11";
+ case PTX_COMPUTE_1_2: return "compute_12";
+ case PTX_COMPUTE_1_3: return "compute_13";
+ case PTX_COMPUTE_2_0: return "compute_20";
+ }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+ switch(PTXVersion) {
+ default: llvm_unreachable("Unknown PTX version");
+ case PTX_VERSION_2_0: return "2.0";
+ case PTX_VERSION_2_1: return "2.1";
+ case PTX_VERSION_2_2: return "2.2";
+ case PTX_VERSION_2_3: return "2.3";
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 0000000..b946d7c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,130 @@
+//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PTXGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+ class PTXSubtarget : public PTXGenSubtargetInfo {
+ public:
+
+ /**
+ * Enumeration of Shader Models supported by the back-end.
+ */
+ enum PTXTargetEnum {
+ PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
+ PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
+ PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
+ PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
+ PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
+ PTX_LAST_COMPUTE,
+
+ PTX_SM_1_0, /*< Shader Model 1.0 */
+ PTX_SM_1_1, /*< Shader Model 1.1 */
+ PTX_SM_1_2, /*< Shader Model 1.2 */
+ PTX_SM_1_3, /*< Shader Model 1.3 */
+ PTX_SM_2_0, /*< Shader Model 2.0 */
+ PTX_SM_2_1, /*< Shader Model 2.1 */
+ PTX_SM_2_2, /*< Shader Model 2.2 */
+ PTX_SM_2_3, /*< Shader Model 2.3 */
+ PTX_LAST_SM
+ };
+
+ /**
+ * Enumeration of PTX versions supported by the back-end.
+ *
+ * Currently, PTX 2.0 is the minimum supported version.
+ */
+ enum PTXVersionEnum {
+ PTX_VERSION_2_0, /*< PTX Version 2.0 */
+ PTX_VERSION_2_1, /*< PTX Version 2.1 */
+ PTX_VERSION_2_2, /*< PTX Version 2.2 */
+ PTX_VERSION_2_3 /*< PTX Version 2.3 */
+ };
+
+ private:
+
+ /// Shader Model supported on the target GPU.
+ PTXTargetEnum PTXTarget;
+
+ /// PTX Language Version.
+ PTXVersionEnum PTXVersion;
+
+ // The native .f64 type is supported on the hardware.
+ bool SupportsDouble;
+
+ // Support the fused-multiply add (FMA) and multiply-add (MAD)
+ // instructions
+ bool SupportsFMA;
+
+ // Use .u64 instead of .u32 for addresses.
+ bool Is64Bit;
+
+ public:
+
+ PTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ // Target architecture accessors
+ std::string getTargetString() const;
+
+ std::string getPTXVersionString() const;
+
+ bool supportsDouble() const { return SupportsDouble; }
+
+ bool is64Bit() const { return Is64Bit; }
+
+ bool supportsFMA() const { return SupportsFMA; }
+
+ bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
+ bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
+
+ bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
+
+ bool fdivNeedsRoundingMode() const {
+ return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool fmadNeedsRoundingMode() const {
+ return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool useParamSpaceForDeviceArgs() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool callsAreHandled() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool emitPtrAttribute() const {
+ return PTXVersion >= PTX_VERSION_2_2;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 0000000..449a3d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,373 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ bool useCFI,
+ MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE,
+ MCAsmBackend *MAB,
+ bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+
+ RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
+ RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
+
+ TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
+}
+
+namespace {
+ const char* DataLayout32 =
+ "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+ const char* DataLayout64 =
+ "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+
+ // Copied from LLVMTargetMachine.cpp
+ void printNoVerify(PassManagerBase &PM, const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ }
+
+ void printAndVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ //if (VerifyMachineCode)
+ // PM.add(createMachineVerifierPass(Banner));
+ }
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ DataLayout(is64Bit ? DataLayout64 : DataLayout32),
+ Subtarget(TT, CPU, FS, is64Bit),
+ FrameLowering(Subtarget),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this) {
+}
+
+PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+}
+
+PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+}
+
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createPTXISelDag(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // PTXMFInfoExtract must after register allocation!
+ //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
+
+ // Add common CodeGen passes.
+ MCContext *Context = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ return true;
+ assert(Context != 0 && "Failed to get MCContext");
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ true, /* verbose asm */
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ InstPrinter,
+ MCE, MAB,
+ false /* show MC encoding */);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ llvm_unreachable("Object file emission is not supported with PTX");
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ // MC Logging
+ //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ PM.add(createGCInfoDeleter());
+ return false;
+}
+
+bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
+ MCContext *&OutContext) {
+ // Add standard LLVM codegen passes.
+ // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
+ // modifications for the PTX target.
+
+ // Standard LLVM-Level Passes.
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel(PM, OptLevel);
+
+ //PM.add(createPrintFunctionPass("\n\n"
+ // "*** Final LLVM Code input to ISel ***\n",
+ // &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Standard Lower-Level Passes.
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ &getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass");
+
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineCSEPass());
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes");
+
+ // Perform register allocation.
+ PM.add(createPTXRegisterAllocator());
+ printAndVerify(PM, "After Register Allocation");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ if (OptLevel != CodeGenOpt::None) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ PM.add(createStackSlotColoringPass(false));
+
+ // FIXME: Post-RA LICM has asserts that fire on virtual registers.
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //if (!DisablePostRAMachineLICM)
+ // PM.add(createMachineLICMPass(false));
+
+ printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+ }
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
+
+ PM.add(createExpandPostRAPseudosPass());
+ printAndVerify(PM, "After ExpandPostRAPseudos");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printNoVerify(PM, "After BranchFolding");
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(false));
+ printNoVerify(PM, "After TailDuplicate");
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ //if (PrintGCInfo)
+ // PM.add(createGCInfoPrinter(dbgs()));
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createCodePlacementOptPass());
+ printNoVerify(PM, "After CodePlacementOpt");
+ }
+
+ if (addPreEmitPass(PM, OptLevel))
+ printNoVerify(PM, "After PreEmit passes");
+
+ PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 0000000..5b7c82b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,115 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSelectionDAGInfo.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+ private:
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXSelectionDAGInfo TSInfo;
+ PTXTargetLowering TLInfo;
+
+ public:
+ PTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit);
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo(); }
+
+ virtual const PTXTargetLowering *getTargetLowering() const {
+ return &TLInfo; }
+
+ virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+
+ // We override this method to supply our own set of codegen passes.
+ virtual bool addPassesToEmitFile(PassManagerBase &,
+ formatted_raw_ostream &,
+ CodeGenFileType,
+ CodeGenOpt::Level,
+ bool = true);
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+ JITCodeEmitter &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ raw_ostream &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ private:
+
+ bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ bool DisableVerify, MCContext *&OutCtx);
+}; // class PTXTargetMachine
+
+
+class PTX32TargetMachine : public PTXTargetMachine {
+public:
+
+ PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+}; // class PTX32TargetMachine
+
+class PTX64TargetMachine : public PTXTargetMachine {
+public:
+
+ PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+}; // class PTX32TargetMachine
+
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 0000000..09a2735
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTX32Target;
+Target llvm::ThePTX64Target;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+ // see llvm/ADT/Triple.h
+ RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
+ "PTX (32-bit) [Experimental]");
+ RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
+ "PTX (64-bit) [Experimental]");
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 0000000..b6a0835
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PPCGenAsmWriter.inc"
+
+StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ bool useSubstituteMnemonic = false;
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; useSubstituteMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; useSubstituteMnemonic = true;
+ SH = 32-SH;
+ }
+ if (useSubstituteMnemonic) {
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+ unsigned Code = MI->getOperand(OpNo).getImm();
+ if (StringRef(Modifier) == "cc") {
+ switch ((PPC::Predicate)Code) {
+ default: assert(0 && "Invalid predicate");
+ case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+ }
+
+ assert(StringRef(Modifier) == "reg" &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ // Don't print the register for 'always'.
+ if (Code == PPC::PRED_ALWAYS) return;
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ char Value = MI->getOperand(OpNo).getImm();
+ Value = (Value << (32-5)) >> (32-5);
+ O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned char Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned char Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)(MI->getOperand(OpNo).getImm()*4);
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ O << "$+";
+ printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: assert(0 && "Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+ O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ printS16X4ImmOperand(MI, OpNo, O);
+ else
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ if (MI->getOperand(OpNo).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ const char *RegName = getRegisterName(Op.getReg());
+ // The linux and AIX assembler does not take register prefixes.
+ if (!isDarwinSyntax())
+ RegName = stripRegisterPrefix(RegName);
+
+ O << RegName;
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "lo16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "ha16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 0000000..4ed4b76
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,71 @@
+//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+ // 0 -> AIX, 1 -> Darwin.
+ unsigned SyntaxVariant;
+public:
+ PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+ : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+
+ bool isDarwinSyntax() const {
+ return SyntaxVariant == 1;
+ }
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier);
+
+
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // FIXME: Remove
+ void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
new file mode 100644
index 0000000..9f2fd6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -0,0 +1,191 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case PPC::fixup_ppc_brcond14:
+ return Value & 0x3ffc;
+ case PPC::fixup_ppc_br24:
+ return Value & 0x3fffffc;
+#if 0
+ case PPC::fixup_ppc_hi16:
+ return (Value >> 16) & 0xffff;
+#endif
+ case PPC::fixup_ppc_ha16:
+ return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
+ case PPC::fixup_ppc_lo16:
+ return Value & 0xffff;
+ }
+}
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {}
+};
+
+class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend, bool isLittleEndian)
+ : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class PPCAsmBackend : public MCAsmBackend {
+const Target &TheTarget;
+public:
+ PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
+
+ unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_lo16", 16, 16, 0 },
+ { "fixup_ppc_ha16", 16, 16, 0 },
+ { "fixup_ppc_lo14", 16, 14, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME.
+ return false;
+ }
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ }
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // FIXME: Zero fill for now. That's not right, but at least will get the
+ // section size right.
+ for (uint64_t i = 0; i != Count; ++i)
+ OW->Write8(0);
+ return true;
+ }
+
+ unsigned getPointerSize() const {
+ StringRef Name = TheTarget.getName();
+ if (Name == "ppc64") return 8;
+ assert(Name == "ppc32" && "Unknown target name!");
+ return 4;
+ }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+ class DarwinPPCAsmBackend : public PPCAsmBackend {
+ public:
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ assert(0 && "UNIMP");
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createMachObjectWriter(new PPCMachObjectWriter(
+ /*Is64Bit=*/is64,
+ (is64 ? object::mach::CTM_PowerPC64 :
+ object::mach::CTM_PowerPC),
+ object::mach::CSPPC_ALL),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+
+ class ELFPPCAsmBackend : public PPCAsmBackend {
+ Triple::OSType OSType;
+ public:
+ ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) :
+ PPCAsmBackend(T), OSType(OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createELFObjectWriter(new PPCELFObjectWriter(
+ /*Is64Bit=*/is64,
+ OSType,
+ is64 ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*addend*/ true, /*isLittleEndian*/ false),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+
+} // end anonymous namespace
+
+
+
+
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) {
+ if (Triple(TT).isOSDarwin())
+ return new DarwinPPCAsmBackend(T);
+
+ return new ELFPPCAsmBackend(T, Triple(TT).getOS());
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
new file mode 100644
index 0000000..369bbdc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PPC target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCBASEINFO_H
+#define PPCBASEINFO_H
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case 0: return 0;
+ case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
+ case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
+ case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
+ case R10: case X10: case F10: case V10: case CR2EQ: return 10;
+ case R11: case X11: case F11: case V11: case CR2UN: return 11;
+ case R12: case X12: case F12: case V12: case CR3LT: return 12;
+ case R13: case X13: case F13: case V13: case CR3GT: return 13;
+ case R14: case X14: case F14: case V14: case CR3EQ: return 14;
+ case R15: case X15: case F15: case V15: case CR3UN: return 15;
+ case R16: case X16: case F16: case V16: case CR4LT: return 16;
+ case R17: case X17: case F17: case V17: case CR4GT: return 17;
+ case R18: case X18: case F18: case V18: case CR4EQ: return 18;
+ case R19: case X19: case F19: case V19: case CR4UN: return 19;
+ case R20: case X20: case F20: case V20: case CR5LT: return 20;
+ case R21: case X21: case F21: case V21: case CR5GT: return 21;
+ case R22: case X22: case F22: case V22: case CR5EQ: return 22;
+ case R23: case X23: case F23: case V23: case CR5UN: return 23;
+ case R24: case X24: case F24: case V24: case CR6LT: return 24;
+ case R25: case X25: case F25: case V25: case CR6GT: return 25;
+ case R26: case X26: case F26: case V26: case CR6EQ: return 26;
+ case R27: case X27: case F27: case V27: case CR6UN: return 27;
+ case R28: case X28: case F28: case V28: case CR7LT: return 28;
+ case R29: case X29: case F29: case V29: case CR7GT: return 29;
+ case R30: case X30: case F30: case V30: case CR7EQ: return 30;
+ case R31: case X31: case F31: case V31: case CR7UN: return 31;
+ default:
+ llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
new file mode 100644
index 0000000..b3c889e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -0,0 +1,45 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+ // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+ // and 'bl'.
+ fixup_ppc_br24 = FirstTargetFixupKind,
+
+ /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14,
+
+ /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'li'.
+ fixup_ppc_lo16,
+
+ /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+ /// like 'lis'.
+ fixup_ppc_ha16,
+
+ /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'std'.
+ fixup_ppc_lo14,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
new file mode 100644
index 0000000..e9424d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -0,0 +1,66 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+using namespace llvm;
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+ if (is64Bit)
+ PointerSize = 8;
+ IsLittleEndian = false;
+
+ PCSymbol = ".";
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+
+ AssemblerDialect = 1; // New-Style mnemonics.
+ SupportsDebugInformation= true; // Debug information.
+}
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+ if (is64Bit)
+ PointerSize = 8;
+ IsLittleEndian = false;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+
+ // Uses '.section' before '.bss' directive
+ UsesELFSectionDirectiveForBSS = true;
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Exceptions handling
+ if (!is64Bit)
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+ LCOMMDirectiveType = LCOMM::NoAlignment;
+ AssemblerDialect = 0; // Old-Style mnemonics.
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
new file mode 100644
index 0000000..96ae6fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit PPCMCAsmInfoDarwin(bool is64Bit);
+ };
+
+ struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+ explicit PPCLinuxMCAsmInfo(bool is64Bit);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
new file mode 100644
index 0000000..262f97c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -0,0 +1,193 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx) {
+ }
+
+ ~PPCMCCodeEmitter() {}
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+
+ // Output the constant in big endian byte order.
+ for (unsigned i = 0; i != 4; ++i) {
+ OS << (char)(Bits >> 24);
+ Bits <<= 8;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+ }
+
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCMCCodeEmitter(MCII, STI, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_ha16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo14));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return getPPCRegisterNumbering(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 0000000..d5c8a9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,151 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "PPCMCAsmInfo.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPPCMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64);
+ unsigned Flavour = isPPC64 ? 0 : 1;
+ unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
+
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
+ return X;
+}
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new PPCMCAsmInfoDarwin(isPPC64);
+ else
+ MAI = new PPCLinuxMCAsmInfo(isPPC64);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ if (RM == Reloc::Default) {
+ Triple T(TT);
+ if (T.isOSDarwin())
+ RM = Reloc::DynamicNoPIC;
+ else
+ RM = Reloc::Static;
+ }
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ if (Triple(TT).isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+extern "C" void LLVMInitializePowerPCTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+ createPPCMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+ createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 0000000..e5bf2a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,51 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+
+MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT);
+
+} // End llvm namespace
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
new file mode 100644
index 0000000..12bb0a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -0,0 +1,31 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown PPC branch opcode!");
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
new file mode 100644
index 0000000..f872e86
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -0,0 +1,37 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_ALWAYS = (0 << 5) | 20,
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
new file mode 100644
index 0000000..5dc1863
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,76 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+ class PPCTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+ class JITCodeEmitter;
+ class Target;
+ class MachineInstr;
+ class AsmPrinter;
+ class MCInst;
+ class TargetMachine;
+
+ FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+ void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin);
+
+ namespace PPCII {
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // PPC Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB = 1,
+
+ /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+ MO_LO16 = 4, MO_HA16 = 8,
+
+ /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+ /// the function's picbase, e.g. lo16(symbol-picbase).
+ MO_PIC_FLAG = 16,
+
+ /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+ /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+ MO_NLP_FLAG = 32,
+
+ /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+ /// symbol with hidden visibility. This causes a different kind of
+ /// non-lazy-pointer to be generated.
+ MO_NLP_HIDDEN_FLAG = 64
+ };
+ } // end namespace PPCII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
new file mode 100644
index 0000000..2d5d302
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,112 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+ "Enable GPUL instructions">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+ [Directive64, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def PPCAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+
+ let AssemblyWriters = [PPCAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 0000000..9528459
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,686 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "InstPrinter/PPCInstPrinter.h"
+using namespace llvm;
+
+namespace {
+ class PPCAsmPrinter : public AsmPrinter {
+ protected:
+ DenseMap<MCSymbol*, MCSymbol*> TOC;
+ const PPCSubtarget &Subtarget;
+ uint64_t TOCLabelID;
+ public:
+ explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+ virtual const char *getPassName() const {
+ return "PowerPC Assembly Printer";
+ }
+
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitFunctionEntryLabel();
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M);
+ void EmitStartOfAsmFile(Module &M);
+
+ void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+ };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ // FIXME: PIC relocation model
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ return;
+ case MachineOperand::MO_ExternalSymbol: {
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() == Reloc::Static) {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ }
+
+ MCSymbol *NLPSym =
+ OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
+ MO.getSymbolName()+"$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
+
+ O << *NLPSym;
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) {
+ if (!GV->hasHiddenVisibility()) {
+ SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>()
+ .getGVStubEntry(SymToPrint);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage()) {
+ SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().
+ getHiddenGVStubEntry(SymToPrint);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = Mang->getSymbol(GV);
+ }
+ } else {
+ SymToPrint = Mang->getSymbol(GV);
+ }
+
+ O << *SymToPrint;
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ break; // PPC never has a prefix.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ assert(MI->getOperand(OpNo).isReg());
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+
+ // Lower multi-instruction pseudo operations.
+ switch (MI->getOpcode()) {
+ default: break;
+ case TargetOpcode::DBG_VALUE: {
+ if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+
+ SmallString<32> Str;
+ raw_svector_ostream O(Str);
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8: {
+ // Transform %LR = MovePCtoLR
+ // Into this, where the label is the PIC base:
+ // bl L1$pb
+ // L1$pb:
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+
+ // Emit the 'bl'.
+ TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
+
+
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
+ Create(PICBase, OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+ return;
+ }
+ case PPC::LDtoc: {
+ // Transform %X3 = LDtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD, and the global address operand to be a
+ // reference to the TOC entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert(MO.isGlobal());
+
+ // Map symbol -> label of TOC entry.
+ MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+ if (TOCEntry == 0)
+ TOCEntry = GetTempSymbol("C", TOCLabelID++);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ case PPC::MFCRpseud:
+ // Transform: %R3 = MFCRpseud %CR7
+ // Into: %R3 = MFCR ;; cr7
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ TmpInst.setOpcode(PPC::MFCR);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+ if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ // Emit an official procedure descriptor.
+ // FIXME 64-bit SVR4: Use MCSection here!
+ OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
+ OutStreamer.EmitRawText(StringRef("\t.align 3"));
+ OutStreamer.EmitLabel(CurrentFnSym);
+ OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
+ ",.TOC.@tocbase");
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
+ OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ if (isPPC64 && !TOC.empty()) {
+ // FIXME 64-bit SVR4: Use MCSection here?
+ OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+
+ // FIXME: This is nondeterminstic!
+ for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+ E = TOC.end(); I != E; ++I) {
+ OutStreamer.EmitLabel(I->second);
+ OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
+ "[TC]," + I->first->getName());
+ }
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppc64"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+
+ // FIXME: This is a total hack, finish mc'izing the PPC backend.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText()));
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText()));
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+ // Remove $stub suffix, add $lazy_ptr.
+ SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
+ TmpStr += "$lazy_ptr";
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+ // Add $tmp suffix to $stub, yielding $stub$tmp.
+ SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
+ TmpStr += "$tmp";
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+ bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+ // .lazy_symbol_pointer
+ const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ // FIXME: MCize this.
+ OutStreamer.EmitRawText(StringRef("\tmflr r0"));
+ OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+ OutStreamer.EmitLabel(AnonSymbol);
+ OutStreamer.EmitRawText(StringRef("\tmflr r11"));
+ OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
+ "-" + AnonSymbol->getName() + ")");
+ OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
+
+ if (isPPC64)
+ OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+ "-" + AnonSymbol->getName() + ")(r11)");
+ else
+ OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+ "-" + AnonSymbol->getName() + ")(r11)");
+ OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+ OutStreamer.EmitRawText(StringRef("\tbctr"));
+
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ if (isPPC64)
+ OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+ }
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
+ if (isPPC64)
+ OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+ ")(r11)");
+ else
+ OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+ ")(r11)");
+ OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+ OutStreamer.EmitRawText(StringRef("\tbctr"));
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ if (isPPC64)
+ OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+ }
+
+ OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+ // Darwin/PPC always uses mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty())
+ EmitFunctionStubs(Stubs);
+
+ if (MAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I) {
+ if (*I) {
+ MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+ }
+ }
+ }
+
+ // Output stubs for dynamically-linked functions.
+ Stubs = MMIMacho.GetGVStubList();
+
+ // Output macho stubs for external and common global variables.
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin())
+ return new PPCDarwinAsmPrinter(tm, Streamer);
+ return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 0000000..475edf3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,174 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<16>(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 0000000..441db94
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,132 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ CCIfType<[f32], CCAssignToReg<[F1]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+
+ // Vector types are always returned in V2.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+
+ // Common sub-targets passes FP values in F1 - F13
+ CCIfType<[f32, f64],
+ CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
+
+ // The first 12 Vector arguments are passed in altivec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC_SVR4_Common : CallingConv<[
+ // The ABI requires i64 to be passed in two adjacent registers with the first
+ // register having an odd register number.
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+ // Make sure the i64 words from a long double are either both passed in
+ // registers or both passed on the stack.
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+
+ // FP values are passed in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // Split arguments have an alignment of 8 bytes on the stack.
+ CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // Floats are stored in double precision format, thus they have the same
+ // alignment and size as doubles.
+ CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
+// vector arguments in vector registers before putting them on the stack.
+def CC_PPC_SVR4 : CallingConv<[
+ // The first 12 Vector arguments are passed in AltiVec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+
+ CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC_SVR4 calling convention.
+//
+// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+
+def CC_PPC_SVR4_ByVal : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+]>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 0000000..4a1f182
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,261 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class PPCCodeEmitter : public MachineFunctionPass {
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+ void *MovePCtoLROffset;
+ public:
+
+ PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+
+ MachineRelocation GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const;
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+ };
+}
+
+char PPCCodeEmitter::ID = 0;
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new PPCCodeEmitter(TM, JCE);
+}
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+ do {
+ MovePCtoLROffset = 0;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ const MachineInstr &MI = *I;
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(MI));
+ break;
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+ }
+}
+
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+}
+
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const {
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ intptr_t Cst = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ Cst = -(intptr_t)MovePCtoLROffset - 4;
+ }
+
+ if (MO.isGlobal())
+ return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+ const_cast<GlobalValue *>(MO.getGlobal()),
+ Cst, isa<Function>(MO.getGlobal()));
+ if (MO.isSymbol())
+ return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ RelocID, MO.getSymbolName(), Cst);
+ if (MO.isCPI())
+ return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+
+ if (MO.isMBB())
+ return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ RelocID, MO.getMBB());
+
+ assert(MO.isJTI());
+ return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+}
+
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+ return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return getPPCRegisterNumbering(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+#include "PPCGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 0000000..7dead10
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,971 @@
+//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment. This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta. You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = getPPCRegisterNumbering(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned RegNo = getPPCRegisterNumbering(*I);
+ if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+ // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+ if (!DisableRedZone &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ // No need for frame
+ MFI->setStackSize(0);
+ return;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // FIXME: This is pretty much broken by design: hasFP() might be called really
+ // early, before the stack layout was calculated and thus hasFP() might return
+ // true or false here depending on the time of call.
+ return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Naked functions have no stack frame pushed, so we don't have a frame
+ // pointer.
+ if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ return false;
+
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
+ (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl;
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ // Prepare for frame info.
+ MCSymbol *FrameLabel = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ // FIXME: determineFrameLayout() may change the frame size. This should be
+ // moved upper, to some hook.
+ determineFrameLayout(MF);
+ unsigned FrameSize = MFI->getStackSize();
+
+ int NegFrameSize = -FrameSize;
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X31)
+ .addImm(FPOffset/4)
+ .addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X0)
+ .addImm(LROffset / 4)
+ .addReg(PPC::X1);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R31)
+ .addImm(FPOffset)
+ .addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R0)
+ .addImm(LROffset)
+ .addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!isPPC64) {
+ // PPC32.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+ .addReg(PPC::R1)
+ .addImm(NegFrameSize)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1)
+ .addImm(NegFrameSize / 4)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Add the "machine moves" for the instructions we generated above, but in
+ // reverse order.
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Show update of SP.
+ if (NegFrameSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ if (MustSaveLR) {
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+ }
+ }
+
+ MCSymbol *ReadyLabel = 0;
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!isPPC64) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+
+ if (needsFrameMoves) {
+ ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+ // Mark effective beginning of when frame pointer is ready.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+ (isPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ }
+
+ if (needsFrameMoves) {
+ MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+
+ // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
+ // subregisters of CR2. We just need to emit a move of CR2.
+ if (Reg == PPC::CR2LT || Reg == PPC::CR2GT || Reg == PPC::CR2EQ)
+ continue;
+ if (Reg == PPC::CR2UN)
+ Reg = PPC::CR2;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+ }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no terminator");
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl;
+
+ assert((RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!isPPC64) {
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+ .addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+ .addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = RegInfo->getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MF.getRegInfo().setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && needsFP(MF)) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+ }
+
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+ // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+ // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+ // r0 for now.
+
+ if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+ if (needsFP(MF) || spillsCR(MF)) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI())
+ return;
+
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty() && !needsFP(MF)) {
+ return;
+ }
+
+ unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
+ unsigned MinFPR = PPC::F31;
+ unsigned MinVR = PPC::V31;
+
+ bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
+ bool HasFPSaveArea = false;
+ bool HasCRSaveArea = false;
+ bool HasVRSAVESaveArea = false;
+ bool HasVRSaveArea = false;
+
+ SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
+ SmallVector<CalleeSavedInfo, 18> FPRegs;
+ SmallVector<CalleeSavedInfo, 18> VRegs;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (PPC::GPRCRegisterClass->contains(Reg)) {
+ HasGPSaveArea = true;
+
+ GPRegs.push_back(CSI[i]);
+
+ if (Reg < MinGPR) {
+ MinGPR = Reg;
+ }
+ } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
+ } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+ HasFPSaveArea = true;
+
+ FPRegs.push_back(CSI[i]);
+
+ if (Reg < MinFPR) {
+ MinFPR = Reg;
+ }
+// FIXME SVR4: Disable CR save area for now.
+ } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+ || PPC::CRRCRegisterClass->contains(Reg)) {
+// HasCRSaveArea = true;
+ } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ HasVRSAVESaveArea = true;
+ } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+ HasVRSaveArea = true;
+
+ VRegs.push_back(CSI[i]);
+
+ if (Reg < MinVR) {
+ MinVR = Reg;
+ }
+ } else {
+ llvm_unreachable("Unknown RegisterClass!");
+ }
+ }
+
+ PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+
+ int64_t LowerBound = 0;
+
+ // Take into account stack space reserved for tail calls.
+ int TCSPDelta = 0;
+ if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ LowerBound = TCSPDelta;
+ }
+
+ // The Floating-point register save area is right below the back chain word
+ // of the previous stack frame.
+ if (HasFPSaveArea) {
+ for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+ int FI = FPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
+ }
+
+ // Check whether the frame pointer register is allocated. If so, make sure it
+ // is spilled to the correct offset.
+ if (needsFP(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getFramePointerSaveIndex();
+ assert(FI && "No Frame Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // General register save area starts right below the Floating-point
+ // register save area.
+ if (HasGPSaveArea || HasG8SaveArea) {
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+ int FI = GPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg =
+ std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
+ getPPCRegisterNumbering(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
+ }
+
+ // The CR save area is below the general register save area.
+ if (HasCRSaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the CR/CRBIT register class?
+ // Adjust the frame index of the CR spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+ PPC::CRRCRegisterClass->contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The CR save area is always 4 bytes long.
+ }
+
+ if (HasVRSAVESaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the VRSAVE register class?
+ // Adjust the frame index of the VRSAVE spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+ }
+
+ if (HasVRSaveArea) {
+ // Insert alignment padding, we need 16-byte alignment.
+ LowerBound = (LowerBound - 15) & ~(15);
+
+ for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+ int FI = VRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 0000000..20faa71
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,321 @@
+//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+ class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+ const PPCSubtarget &Subtarget;
+
+public:
+ PPCFrameLowering(const PPCSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ }
+
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool needsFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI)
+ return isPPC64 ? 16 : 8;
+ // SVR4 ABI:
+ return isPPC64 ? 16 : 4;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI:
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
+ if (isDarwinABI)
+ return isPPC64 ? -8U : -4U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -8U : -4U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI || isPPC64)
+ return 6 * (isPPC64 ? 8 : 4);
+
+ // SVR4 ABI:
+ return 8;
+ }
+
+ /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+ /// argument area.
+ static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI / 64-bit SVR4 ABI:
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ if (isDarwinABI || isPPC64)
+ return 8 * (isPPC64 ? 8 : 4);
+
+ // 32-bit SVR4 ABI:
+ // There is no default stack allocated for the 8 first GPR arguments.
+ return 0;
+ }
+
+ /// getMinCallFrameSize - Return the minimum size a call frame can be using
+ /// the PowerPC ABI.
+ static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
+ // The call frame needs to be at least big enough for linkage and 8 args.
+ return getLinkageSize(isPPC64, isDarwinABI) +
+ getMinCallArgumentsSize(isPPC64, isDarwinABI);
+ }
+
+ // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return 0;
+ }
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset.
+ // FIXME SVR4: Disable CR save area for now.
+// {PPC::CR2, -4},
+// {PPC::CR3, -4},
+// {PPC::CR4, -4},
+// {PPC::CR2LT, -4},
+// {PPC::CR2GT, -4},
+// {PPC::CR2EQ, -4},
+// {PPC::CR2UN, -4},
+// {PPC::CR3LT, -4},
+// {PPC::CR3GT, -4},
+// {PPC::CR3EQ, -4},
+// {PPC::CR3UN, -4},
+// {PPC::CR4LT, -4},
+// {PPC::CR4GT, -4},
+// {PPC::CR4EQ, -4},
+// {PPC::CR4UN, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}
+ };
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+ // mode?
+ {PPC::R31, -4},
+ {PPC::R30, -12},
+ {PPC::R29, -20},
+ {PPC::R28, -28},
+ {PPC::R27, -36},
+ {PPC::R26, -44},
+ {PPC::R25, -52},
+ {PPC::R24, -60},
+ {PPC::R23, -68},
+ {PPC::R22, -76},
+ {PPC::R21, -84},
+ {PPC::R20, -92},
+ {PPC::R19, -100},
+ {PPC::R18, -108},
+ {PPC::R17, -116},
+ {PPC::R16, -124},
+ {PPC::R15, -132},
+ {PPC::R14, -140},
+
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // CR save area offset.
+ // FIXME SVR4: Disable CR save area for now.
+// {PPC::CR2, -4},
+// {PPC::CR3, -4},
+// {PPC::CR4, -4},
+// {PPC::CR2LT, -4},
+// {PPC::CR2GT, -4},
+// {PPC::CR2EQ, -4},
+// {PPC::CR2UN, -4},
+// {PPC::CR3LT, -4},
+// {PPC::CR3GT, -4},
+// {PPC::CR3EQ, -4},
+// {PPC::CR3UN, -4},
+// {PPC::CR4LT, -4},
+// {PPC::CR4GT, -4},
+// {PPC::CR4EQ, -4},
+// {PPC::CR4UN, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}
+ };
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 0000000..cddc9d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,308 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+ : TII(tii) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DEBUG(errs() << "=== Start of dispatch group\n");
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ if ((int)Opcode >= 0) {
+ isFirst = isSingle = isCracked = isLoad = isStore = false;
+ return PPCII::PPC970_Pseudo;
+ }
+ Opcode = ~Opcode;
+
+ const MCInstrDesc &MCID = TII.get(Opcode);
+
+ isLoad = MCID.mayLoad();
+ isStore = MCID.mayStore();
+
+ uint64_t TSFlags = MCID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+ return true;
+ if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StorePtr2[i] == Ptr2) {
+ if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+ if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ int StoreOffs = StoreOffset->getZExtValue();
+ int LoadOffs = LoadOffset->getZExtValue();
+ if (StoreOffs < LoadOffs) {
+ if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+ } else {
+ if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+ const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: llvm_unreachable("Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores) {
+ unsigned LoadSize;
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown load!");
+ case PPC::LBZ: case PPC::LBZU:
+ case PPC::LBZX:
+ case PPC::LBZ8: case PPC::LBZU8:
+ case PPC::LBZX8:
+ case PPC::LVEBX:
+ LoadSize = 1;
+ break;
+ case PPC::LHA: case PPC::LHAU:
+ case PPC::LHAX:
+ case PPC::LHZ: case PPC::LHZU:
+ case PPC::LHZX:
+ case PPC::LVEHX:
+ case PPC::LHBRX:
+ case PPC::LHA8: case PPC::LHAU8:
+ case PPC::LHAX8:
+ case PPC::LHZ8: case PPC::LHZU8:
+ case PPC::LHZX8:
+ LoadSize = 2;
+ break;
+ case PPC::LFS: case PPC::LFSU:
+ case PPC::LFSX:
+ case PPC::LWZ: case PPC::LWZU:
+ case PPC::LWZX:
+ case PPC::LWA:
+ case PPC::LWAX:
+ case PPC::LVEWX:
+ case PPC::LWBRX:
+ case PPC::LWZ8:
+ case PPC::LWZX8:
+ LoadSize = 4;
+ break;
+ case PPC::LFD: case PPC::LFDU:
+ case PPC::LFDX:
+ case PPC::LD: case PPC::LDU:
+ case PPC::LDX:
+ LoadSize = 8;
+ break;
+ case PPC::LVX:
+ case PPC::LVXL:
+ LoadSize = 16;
+ break;
+ }
+
+ if (isLoadOfStoredAddress(LoadSize,
+ Node->getOperand(0), Node->getOperand(1)))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore) {
+ unsigned ThisStoreSize;
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown store instruction!");
+ case PPC::STB: case PPC::STB8:
+ case PPC::STBU: case PPC::STBU8:
+ case PPC::STBX: case PPC::STBX8:
+ case PPC::STVEBX:
+ ThisStoreSize = 1;
+ break;
+ case PPC::STH: case PPC::STH8:
+ case PPC::STHU: case PPC::STHU8:
+ case PPC::STHX: case PPC::STHX8:
+ case PPC::STVEHX:
+ case PPC::STHBRX:
+ ThisStoreSize = 2;
+ break;
+ case PPC::STFS:
+ case PPC::STFSU:
+ case PPC::STFSX:
+ case PPC::STWX: case PPC::STWX8:
+ case PPC::STWUX:
+ case PPC::STW: case PPC::STW8:
+ case PPC::STWU:
+ case PPC::STVEWX:
+ case PPC::STFIWX:
+ case PPC::STWBRX:
+ ThisStoreSize = 4;
+ break;
+ case PPC::STD_32:
+ case PPC::STDX_32:
+ case PPC::STD:
+ case PPC::STDU:
+ case PPC::STFD:
+ case PPC::STFDX:
+ case PPC::STDX:
+ case PPC::STDUX:
+ ThisStoreSize = 8;
+ break;
+ case PPC::STVX:
+ case PPC::STVXL:
+ ThisStoreSize = 16;
+ break;
+ }
+
+ StoreSize[NumStores] = ThisStoreSize;
+ StorePtr1[NumStores] = Node->getOperand(1);
+ StorePtr2[NumStores] = Node->getOperand(2);
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 0000000..2f81f0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const TargetInstrInfo &TII;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ SDValue StorePtr1[4], StorePtr2[4];
+ unsigned StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(unsigned LoadSize,
+ SDValue Ptr1, SDValue Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 0000000..6f204cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1087 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class PPCDAGToDAGISel : public SelectionDAGISel {
+ const PPCTargetMachine &TM;
+ const PPCTargetLowering &PPCLowering;
+ const PPCSubtarget &PPCSubTarget;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(*TM.getTargetLowering()),
+ PPCSubTarget(*TM.getSubtargetImpl()) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnMachineFunction(MF);
+
+ InsertVRSaveCode(MF);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ Out = N;
+ return true;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmShift - Returns true if the address N can be represented by
+ /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
+ /// for use by STD and friends.
+ bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions. It is always correct to compute the value into
+ /// a register. The case of adding a (possibly relocatable) constant to a
+ /// register can be improved, but it is wrong to substitute Reg+Reg for
+ /// Reg in an asm, because the load or store opcode would have to change.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ OutOps.push_back(Op);
+ return false;
+ }
+
+ void InsertVRSaveCode(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDNode *N);
+ };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type created
+ // by the scheduler. Detect them now.
+ bool HasVectorVReg = false;
+ for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl;
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl;
+
+ if (PPCLowering.getPointerTy() == MVT::i32) {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = CountLeadingZeros_32(Val);
+ // look for the first zero bit after the run of ones
+ ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = CountLeadingZeros_32(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool isShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (isShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (isShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ } else {
+ Op1 = Op1.getOperand(0);
+ }
+ }
+
+ SH &= 31;
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+ return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, DebugLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>((int)Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt<32>(Imm)) {
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ llvm_unreachable("Should be lowered by legalize!");
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons. In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+ Invert = false;
+ Other = -1;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ llvm_unreachable("Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+ return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ SDValue AD =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ bool Inv;
+ int OtherCondIdx;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(0, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ CR7Reg, CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (OtherCondIdx == -1 && !Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ if (Inv) {
+ assert(OtherCondIdx == -1 && "Can't have split plus negation");
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ }
+
+ // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+ // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+ // Get the other bit of the comparison.
+ Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+ SDValue OtherCond =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = CountTrailingZeros_64(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift),
+ getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ return SelectSETCC(N);
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFCR: {
+ SDValue InFlag = N->getOperand(1);
+ // Use MFOCRF if supported.
+ if (PPCSubTarget.isGigaProcessor())
+ return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ else
+ return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDValue N0 = N->getOperand(0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDValue PT =
+ SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (isa<ConstantSDNode>(Offset) ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ // FIXME: PPC64
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
+ } else {
+ llvm_unreachable("R+R preindex loads not supported yet!");
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return NULL;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (!isPPC64)
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDValue Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 0000000..d6b8a9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,5793 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+ cl::Hidden);
+
+static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->isDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new TargetLoweringObjectFileELF();
+}
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+ // arguments are at least 4/8 bytes aligned.
+ setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+ addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+ addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ }
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ } else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ // This is just the low 32 bits of a (signed) fp->i64 conversion.
+ // We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+ addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ }
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine(ISD::BSWAP);
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ setMinFunctionAlignment(2);
+ if (PPCSubTarget.isDarwin())
+ setPrefFunctionAlignment(4);
+
+ setInsertFencesForAtomic(true);
+
+ computeRegisterProperties();
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
+ const TargetMachine &TM = getTargetMachine();
+ // Darwin passes everything on 4 byte boundary.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ return 4;
+ // FIXME SVR4 TBD
+ return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
+ case PPCISD::LOAD: return "PPCISD::LOAD";
+ case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
+ case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
+ case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
+ case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
+ case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
+ case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ }
+}
+
+EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ return CFP->getValueAPF().isNegZero();
+
+ return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(0, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDValue();
+ }
+
+ // Properly sign extend the value.
+ int ShAmt = (4-ByteSize)*8;
+ int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N.getOperand(1),
+ APInt::getAllOnesValue(N.getOperand(1)
+ .getValueSizeInBits()),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from parent load or store, not from address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4]. Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address. Verify low two bits are clear.
+ if ((CN->getZExtValue() & 3) == 0) {
+ // If this address fits entirely in a 14-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Fold the low-part of 32-bit absolute addresses into addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+ Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
+ return true;
+ }
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ // Disabled by default for now.
+ if (!EnablePPCPreinc) return false;
+
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (VT.isVector())
+ return false;
+
+ // TODO: Check reg+reg first.
+
+ // LDU/STU use reg+imm*4, others use reg+imm.
+ if (VT != MVT::i64) {
+ // reg + imm
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ return false;
+ } else {
+ // reg + imm * 4.
+ if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+ unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ HiOpFlags = PPCII::MO_HA16;
+ LoOpFlags = PPCII::MO_LO16;
+
+ // Don't use the pic base if not in PIC relocation model. Or if we are on a
+ // non-darwin platform. We don't support PIC on other platforms yet.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+ TM.getSubtarget<PPCSubtarget>().isDarwin();
+ if (isPIC) {
+ HiOpFlags |= PPCII::MO_PIC_FLAG;
+ LoOpFlags |= PPCII::MO_PIC_FLAG;
+ }
+
+ // If this is a reference to a global value that requires a non-lazy-ptr, make
+ // sure that instruction lowering adds it.
+ if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ HiOpFlags |= PPCII::MO_NLP_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+ if (GV->hasHiddenVisibility()) {
+ HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ }
+ }
+
+ return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+ SelectionDAG &DAG) {
+ EVT PtrVT = HiPart.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ DebugLoc DL = HiPart.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ if (isPIC)
+ Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue CPIHi =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ SDValue CPILo =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+ return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
+ SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ DebugLoc DL = GSDN->getDebugLoc();
+ const GlobalValue *GV = GSDN->getGlobal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+ SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+ // If the global reference is actually to a non-lazy-pointer, we have to do an
+ // extra load to get the address of the global.
+ if (MOHiFlag & PPCII::MO_NLP_FLAG)
+ Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+ false, false, 0);
+ return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ EVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ EVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+ // gpr_index
+ SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ VAListPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = GprIndex.getValue(1);
+
+ if (VT == MVT::i64) {
+ // Check if GprIndex is even
+ SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+ DAG.getConstant(0, MVT::i32), ISD::SETNE);
+ SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ // Align GprIndex to be even if it isn't
+ GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+ GprIndex);
+ }
+
+ // fpr index is 1 byte after gpr
+ SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(1, MVT::i32));
+
+ // fpr
+ SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ FprPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = FprIndex.getValue(1);
+
+ SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(8, MVT::i32));
+
+ SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(4, MVT::i32));
+
+ // areas
+ SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+ MachinePointerInfo(), false, false, 0);
+ InChain = OverflowArea.getValue(1);
+
+ SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+ MachinePointerInfo(), false, false, 0);
+ InChain = RegSaveArea.getValue(1);
+
+ // select overflow_area if index > 8
+ SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+ // adjustment constant gpr_index * 4/8
+ SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ // OurReg = RegSaveArea + RegConstant
+ SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+ RegConstant);
+
+ // Floating types are 32 bytes into RegSaveArea
+ if (VT.isFloatingPoint())
+ OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+ DAG.getConstant(32, MVT::i32));
+
+ // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+ SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+ MVT::i32));
+
+ InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+ VT.isInteger() ? VAListPtr : FprPtr,
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+
+ // determine if we should load from reg_save_area or overflow_area
+ SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+ // increase overflow_area by 4/8 if gpr/fpr > 8
+ SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+ OverflowAreaPlusN);
+
+ InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+ OverflowAreaPtr,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+ Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+ *DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ Args, DAG, dl);
+
+ return CallResult.second;
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+ }
+
+ // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+ SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+ PtrVT);
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1),
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+ MachinePointerInfo(SV, nextOffset), MVT::i8,
+ false, false, 0);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const unsigned *GetFPR() {
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+
+ return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned ArgSize = ArgVT.getSizeInBits()/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+ return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ } else {
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // 32-bit SVR4 ABI Stack Frame Layout:
+ // +-----------------------------------+
+ // +--> | Back chain |
+ // | +-----------------------------------+
+ // | | Floating-point register save area |
+ // | +-----------------------------------+
+ // | | General register save area |
+ // | +-----------------------------------+
+ // | | CR save word |
+ // | +-----------------------------------+
+ // | | VRSAVE save word |
+ // | +-----------------------------------+
+ // | | Alignment padding |
+ // | +-----------------------------------+
+ // | | Vector register save area |
+ // | +-----------------------------------+
+ // | | Local variable space |
+ // | +-----------------------------------+
+ // | | Parameter list area |
+ // | +-----------------------------------+
+ // | | LR save word |
+ // | +-----------------------------------+
+ // SP--> +--- | Back chain |
+ // +-----------------------------------+
+ //
+ // Specifications:
+ // System V Application Binary Interface PowerPC Processor Supplement
+ // AltiVec Technology Programming Interface Manual
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ unsigned PtrByteSize = 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ TargetRegisterClass *RC;
+ EVT ValVT = VA.getValVT();
+
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i32:
+ RC = PPC::GPRCRegisterClass;
+ break;
+ case MVT::f32:
+ RC = PPC::F4RCRegisterClass;
+ break;
+ case MVT::f64:
+ RC = PPC::F8RCRegisterClass;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ RC = PPC::VRRCRegisterClass;
+ break;
+ }
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+
+ InVals.push_back(ArgValue);
+ } else {
+ // Argument stored in memory.
+ assert(VA.isMemLoc());
+
+ unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ isImmutable);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // Assign locations to all of the incoming aggregate by value arguments.
+ // Aggregates passed by value are stored in the local variable space of the
+ // caller's stack frame, right above the parameter list area.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(false, false));
+
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+
+ FI->setMinReservedArea(MinReservedArea);
+
+ SmallVector<SDValue, 8> MemOps;
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ static const unsigned GPArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+ static const unsigned FPArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+ FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+ NumGPArgRegs));
+ FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+ NumFPArgRegs));
+
+ // Make room for NumGPArgRegs and NumFPArgRegs.
+ int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+ NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+
+ FuncInfo->setVarArgsStackOffset(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ CCInfo.getNextStackOffset(), true));
+
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+ // is set.
+ // The double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const unsigned *FPR = GetFPR();
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+ ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += isPPC64 ? 8 : 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(),
+ ObjSize==1 ? MVT::i8 : MVT::i16,
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+
+ ArgOffset += PtrByteSize;
+
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgVal will be address of the beginning of
+ // the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ if (!isPPC64) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32) {
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // All FP arguments reserve stack space in the Darwin ABI.
+ ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area for the Darwin ABI.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isVarArg,
+ unsigned CC,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned NumOps = Outs.size();
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+
+ // Tail call needs the stack to be aligned.
+ if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accommodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+ unsigned ParamSize) {
+
+ if (!isTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!GuaranteedTailCallOpt)
+ return false;
+
+ // Variable argument functions are not supported.
+ if (isVarArg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != Ins.size(); i++) {
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Flags.isByVal()) return false;
+ }
+
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 6 >> 6) != Addr)
+ return 0; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2,
+ DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDValue, 8> &MemOpChains,
+ DebugLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isDarwinABI,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
+ isDarwinABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewRetAddr),
+ false, false, 0);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ int NewFPLoc =
+ SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+ true);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(NewFPIdx),
+ false, false, 0);
+ }
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ DebugLoc dl) const {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+ false, false, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+ false, false, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, false, MachinePointerInfo(0),
+ MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+ DebugLoc dl) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+ DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDValue LROp, SDValue FPOp, bool isDarwinABI,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isDarwinABI, dl);
+
+ // Emit callseq_end just before tailcall node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+ SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+ SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ const PPCSubtarget &PPCSubTarget) {
+
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+
+ unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+
+ bool needIndirectCall = true;
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ unsigned OpFlags = 0;
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(),
+ 0, OpFlags);
+ needIndirectCall = false;
+ }
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+ OpFlags);
+ needIndirectCall = false;
+ }
+
+ if (needIndirectCall) {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+ if (isSVR4ABI && isPPC64) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // All those operations are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // operations together, a TOC access in the caller could be scheduled
+ // between the load of the callee TOC and the branch to the callee, which
+ // results in the TOC access going through the TOC of the callee instead
+ // of going through the TOC of the caller, which leads to incorrect code.
+
+ // Load the address of the function entry point from the function
+ // descriptor.
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
+ InFlag.getNode() ? 3 : 2);
+ Chain = LoadFuncPtr.getValue(1);
+ InFlag = LoadFuncPtr.getValue(2);
+
+ // Load environment pointer into r11.
+ // Offset of the environment pointer within the function descriptor.
+ SDValue PtrOff = DAG.getIntPtrConstant(16);
+
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+ SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
+ InFlag);
+ Chain = LoadEnvPtr.getValue(1);
+ InFlag = LoadEnvPtr.getValue(2);
+
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+
+ // Load TOC of the callee into r2. We are using a target-specific load
+ // with r2 hard coded, because the result of a target-independent load
+ // would never go directly into r2, since r2 is a reserved register (which
+ // prevents the register allocator from allocating it), resulting in an
+ // additional register being allocated and an unnecessary move instruction
+ // being generated.
+ VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
+ Callee, InFlag);
+ Chain = LoadTOCPtr.getValue(0);
+ InFlag = LoadTOCPtr.getValue(1);
+
+ MTCTROps[0] = Chain;
+ MTCTROps[1] = LoadFuncPtr;
+ MTCTROps[2] = InFlag;
+ }
+
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+ 2 + (InFlag.getNode() != 0));
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ Ops.push_back(Chain);
+ CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ Callee.setNode(0);
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ return CallOpc;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ EVT VT = VA.getValVT();
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VT, InFlag).getValue(1);
+ InVals.push_back(Chain.getValue(0));
+ InFlag = Chain.getValue(2);
+ }
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+ bool isTailCall, bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const {
+ std::vector<EVT> NodeTys;
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+ isTailCall, RegsToPass, Ops, NodeTys,
+ PPCSubTarget);
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ // If this is the first return lowered for this function, add the regs
+ // to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ assert(((Callee.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+ Callee.getOpcode() == ISD::TargetExternalSymbol ||
+ Callee.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(Callee)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Add a NOP immediately after the branch instruction when using the 64-bit
+ // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // thus have a different TOC, the call will be replaced with a call to a stub
+ // function which saves the current TOC, loads the TOC of the callee and
+ // branches to the callee. The NOP will be replaced with a load instruction
+ // which restores the TOC of the caller from the TOC save slot of the current
+ // stack frame. If caller and callee belong to the same module (and have the
+ // same TOC), the NOP will remain unchanged.
+ if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (CallOpc == PPCISD::BCTRL_SVR4) {
+ // This is a call through a function pointer.
+ // Restore the caller TOC from the save area into R2.
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ // We are using a target-specific load with r2 hard coded, because the
+ // result of a target-independent load would never go directly into r2,
+ // since r2 is a reserved register (which prevents the register allocator
+ // from allocating it), resulting in an additional register being
+ // allocated and an unnecessary move instruction being generated.
+ Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ // Otherwise insert NOP.
+ InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+ }
+ }
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(BytesCalleePops, true),
+ InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (isTailCall)
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ Ins, DAG);
+
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+ // of the 32-bit SVR4 ABI stack frame layout.
+
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+ unsigned PtrByteSize = 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area and the part of the local variable space which
+ // contains copies of aggregates which are passed by value.
+
+ // Assign locations to all of the outgoing arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+ if (isVarArg) {
+ // Handle fixed and variable vector arguments differently.
+ // Fixed vector arguments go into registers as long as registers are
+ // available. Variable vector arguments always go into memory.
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ bool Result;
+
+ if (Outs[i].IsFixed) {
+ Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
+ } else {
+ Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
+ }
+
+ if (Result) {
+#ifndef NDEBUG
+ errs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+ } else {
+ // All arguments are treated the same.
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ }
+
+ // Assign locations to all of the outgoing aggregate by value arguments.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+
+ // Size of the linkage area, parameter list area and the part of the local
+ // space variable where copies of aggregates which are passed by value are
+ // stored.
+ unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be moved somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ bool seenFloatArg = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ i != e;
+ ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Argument is an aggregate which is passed by value, thus we need to
+ // create a copy of it in the local variable space of the current stack
+ // frame (which is the stack frame of the caller) and pass the address of
+ // this copy to the callee.
+ assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+ CCValAssign &ByValVA = ByValArgLocs[j++];
+ assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+ // Memory reserved in the local variable space of the callers stack frame.
+ unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ // Create a copy of the argument in the local area of the current
+ // stack frame.
+ SDValue MemcpyCall =
+ CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Pass the address of the aggregate copy on the stack either in a
+ // physical register or in the parameter list area of the current stack
+ // frame to the callee.
+ Arg = PtrOff;
+ }
+
+ if (VA.isRegLoc()) {
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
+ // Put argument in a physical register.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Put argument in the parameter list area of the current stack frame.
+ assert(VA.isMemLoc());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+
+ if (!isTailCall) {
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ } else {
+ // Calculate and remember argument location.
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+ TailCallArguments);
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Set CR6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
+ dl, MVT::i32), 0);
+ RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+ false, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+ Outs, OutVals,
+ nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const unsigned *FPR = GetFPR();
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ if (Size==1 || Size==2) {
+ // Very small objects are passed right-justified.
+ // Everything else is passed left-justified.
+ EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ // And copy the pieces of it that fit into registers.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Check if this is an indirect call (MTCTR/BCTRL).
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG)) {
+ // Load r2 into a virtual register and store it to the TOC save area.
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ SDValue PtrOff = DAG.getIntPtrConstant(40);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ // On Darwin, R12 must contain the address of an indirect callee. This does
+ // not mean the MTCTR instruction must use R12; it's easier to model this as
+ // an extra parameter, so do that.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG))
+ RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+ PPC::R12), Callee));
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool isPPC64 = Subtarget.isPPC64();
+ unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
+ isDarwinABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+ EVT ResVT = Op.getValueType();
+ EVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+ PPCISD::FCTIDZ,
+ dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+
+ // Emit a store to the stack slot.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32)
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, FIPtr.getValueType()));
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, 8, 8);
+ SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
+ SDValue Store =
+ DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
+ Ops, 4, MVT::i64, MMO);
+ // Load the value as a double.
+ SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+ false, false, 0);
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ std::vector<EVT> NodeTys;
+ SDValue MFFSreg, InFlag;
+
+ // Save FP Control Word to register
+ NodeTys.push_back(MVT::f64); // return register
+ NodeTys.push_back(MVT::Glue); // unused in this context
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, MachinePointerInfo(), false, false,0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, DebugLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const EVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ EVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+ &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ DebugLoc dl, EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+ Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ EVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ EVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ DebugLoc dl = Op.getDebugLoc();
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ std::vector<EVT> VTs;
+ VTs.push_back(Op.getOperand(2).getValueType());
+ VTs.push_back(MVT::Glue);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, MachinePointerInfo(),
+ false, false, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ llvm_unreachable("Unknown mul to lower!");
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, PPCSubTarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, PPCSubTarget);
+
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
+ Op.getDebugLoc());
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+ return SDValue();
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ const TargetMachine &TM = getTargetMachine();
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::VAARG: {
+ if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ return;
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64) {
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+ Results.push_back(NewNode);
+ Results.push_back(NewNode.getValue(1));
+ }
+ return;
+ }
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1));
+
+ // This sequence changes FPSCR to do round-to-zero, adds the two halves
+ // of the long double, and puts FPSCR back the way it was. We do not
+ // actually model FPSCR.
+ std::vector<EVT> NodeTys;
+ SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+ NodeTys.push_back(MVT::f64); // Return register
+ NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
+ Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ MFFSreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Glue); // Returns a flag
+ Ops[0] = DAG.getConstant(31, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Glue); // Returns a flag
+ Ops[0] = DAG.getConstant(30, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64); // result of add
+ NodeTys.push_back(MVT::Glue); // Returns a flag
+ Ops[0] = Lo;
+ Ops[1] = Hi;
+ Ops[2] = InFlag;
+ Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
+ FPreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64);
+ Ops[0] = DAG.getConstant(1, MVT::i32);
+ Ops[1] = MFFSreg;
+ Ops[2] = FPreg;
+ Ops[3] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
+ FPreg = Result.getValue(0);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ bool is64bit, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = PPCSubTarget.isPPC64();
+ unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+ .addReg(ShiftReg);
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineFunction *F = BB->getParent();
+
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, true, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[wd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = PPCSubTarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+ .addReg(ShiftReg);
+ } else {
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ const TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue() || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64 &&
+ N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+ DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(Val.getNode());
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
+ N->getOperand(2), N->getOperand(3));
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (cast<StoreSDNode>(N)->isUnindexed() &&
+ N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16)) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+ Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ }
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
+ LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = 0;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = 0;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == 0; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFCR instruction, we know this is safe. Otherwise we
+ // give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFCR)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ std::vector<EVT> VTs;
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ VTs.push_back(LHS.getOperand(2).getValueType());
+ VTs.push_back(MVT::Glue);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'b':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'd':
+ if (type->isDoubleTy())
+ weight = CW_Register;
+ break;
+ case 'v':
+ if (type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, PPC::G8RCRegisterClass);
+ return std::make_pair(0U, PPC::GPRCRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, PPC::F4RCRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, PPC::F8RCRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, PPC::VRRCRegisterClass);
+ case 'y': // crrc
+ return std::make_pair(0U, PPC::CRRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0,0);
+
+ // Only support length 1 constraints.
+ if (Constraint.length() > 1) return;
+
+ char Letter = Constraint[0];
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ unsigned Value = CST->getZExtValue();
+ switch (Letter) {
+ default: llvm_unreachable("Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
+ // PPC allows a sign-extended 16-bit immediate field.
+ return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ // Make sure the function does not optimize away the store of the RA to
+ // the stack.
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setLRStoreRequired();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+
+ DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
+ isPPC64? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+ MFI->getStackSize() &&
+ !MF.getFunction()->hasFnAttr(Attribute::Naked);
+ unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
+ (is31 ? PPC::R31 : PPC::R1);
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+ PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo(), false, false, 0);
+ return FrameAddr;
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (this->PPCSubTarget.isPPC64()) {
+ return MVT::i64;
+ } else {
+ return MVT::i32;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 0000000..430e45e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,493 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ TOC_ENTRY,
+
+ /// The following three target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// Restore the TOC from the TOC save area of the current stack frame.
+ /// This is basically a hard coded load instruction which additionally
+ /// takes/produces a flag.
+ TOC_RESTORE,
+
+ /// Like a regular LOAD but additionally taking/producing a flag.
+ LOAD,
+
+ /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
+ /// a hard coded load instruction.
+ LOAD_TOC,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+ /// registers.
+ EXTSW_32,
+
+ /// CALL - A direct function call.
+ CALL_Darwin, CALL_SVR4,
+
+ /// NOP - Special NOP which follows 64-bit SVR4 calls.
+ NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL_Darwin, BCTRL_SVR4,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+ /// instructions. This copies the bits corresponding to the specified
+ /// CRREG into the resultant GPR. Bits corresponding to other CR regs
+ /// are undefined.
+ MFCR,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ // The following 5 instructions are used only as part of the
+ // long double-to-int conversion sequence.
+
+ /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+ /// register.
+ MFFS,
+
+ /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+ MTFSB0,
+
+ /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+ MTFSB1,
+
+ /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+ /// rounding towards zero. It has flags added so it won't move past the
+ /// FPSCR-setting instructions.
+ FADDRTZ,
+
+ /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+ MTFSF,
+
+ /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+ /// reserve indexed. This is used to implement atomic operations.
+ LARX,
+
+ /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+ /// indexed. This is used to implement atomic operations.
+ STCX,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+ STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+ /// are -0.0.
+ bool isAllNegativeZeroVector(SDNode *N);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ const PPCSubtarget &PPCSubTarget;
+
+ public:
+ explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImmShift - Returns true if the address N can be
+ /// represented by a base register plus a signed 14-bit displacement
+ /// [r+imm*4]. Suitable for use by STD and friends.
+ bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB, bool is64Bit,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(Type *Ty) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode for load / store of the
+ /// given type.
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
+
+ /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+ /// the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If
+ /// 'NonScalarIntSafe' is true, that means it's safe to return a
+ /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+ /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+ /// constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ virtual EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe, bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ private:
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ DebugLoc dl) const;
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+ bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFormalArguments_Darwin(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ };
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 0000000..e88ad37
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,749 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+ let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo64 : Operand<i64> {
+ let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>,
+ PPC970_Unit_BRU;
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_Darwin : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_Darwin : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+ }
+}
+
+// ELF 64 ABI Calls = Darwin ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_ELF : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_ELF : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+ }
+}
+
+
+// Calls
+def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
+ (BL8_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
+ (BL8_Darwin texternalsym:$dst)>;
+
+def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
+ (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
+ (BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCnop),
+ (NOP)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
+ [(set G8RC:$dst,
+ (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
+ [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", LdStLDARX,
+ [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", LdStSTDCX,
+ [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
+ let isReturn = 1 in {
+ def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+ }
+
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+}
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
+ [(set G8RC:$result,
+ (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+
+def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+
+let Defs = [CARRY] in {
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+}
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+}
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhd $rT, $rA, $rB", IntMulHW,
+ [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhdu $rT, $rA, $rB", IntMulHWU,
+ [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "sld $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srd $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+let Defs = [CARRY] in {
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srad $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+}
+
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+let Defs = [CARRY] in {
+def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IntRotateD,
+ [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+}
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+ "cntlzd $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divd $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divdu $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulld $rT, $rA, $rB", IntMulHD,
+ [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+ (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
+ "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
+ "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ []>, isPPC64;
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA,
+ [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in
+def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+ ptr_rc:$rA),
+ "lhau $rD, $disp($rA)", LdStGeneral,
+ []>, RegConstraint<"$rA = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+ "ld $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+ "ld 2, 8($reg)", LdStLD,
+ [(PPCload_toc G8RC:$reg)]>, isPPC64;
+
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+ "ld 2, 40(1)", LdStLD,
+ [(PPCtoc_restore)]>, isPPC64;
+def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+
+let mayLoad = 1 in
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", LdStLD,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+}
+
+def : Pat<(PPCload ixaddr:$src),
+ (LD ixaddr:$src)>;
+def : Pat<(PPCload xaddr:$src),
+ (LDX xaddr:$src)>;
+
+let PPC970_Unit = 2 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+ "std $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdx $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+let PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ s16immX4:$ptroff, ptr_rc:$ptrreg),
+ "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+let mayStore = 1 in
+def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTD,
+ []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
+ "std $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
+ "stdx $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
+def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfid $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctidz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+ (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+ (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+ (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra G8RC:$rS, GPRC:$rB),
+ (SRAD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(srl G8RC:$rS, GPRC:$rB),
+ (SRD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(shl G8RC:$rS, GPRC:$rB),
+ (SLD G8RC:$rS, GPRC:$rB)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+ (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl G8RC:$in, GPRC:$sh),
+ (RLDCL G8RC:$in, GPRC:$sh, 0)>;
+def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 G8RC:$in, tjumptable:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 G8RC:$in, tblockaddress:$g)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 0000000..256370f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,695 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+def V_immneg0 : PatLeaf<(build_vector), [{
+ return PPC::isAllNegativeZeroVector(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def DSS : DSS_Form<822, (outs),
+ (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM", LdStGeneral /*FIXME*/, []>;
+def DSSALL : DSS_Form<822, (outs),
+ (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dssall", LdStGeneral /*FIXME*/, []>;
+def DST : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def DST64 : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT64 : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST64 : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+ "mfvscr $vD", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+ "mtvscr $vB", LdStGeneral,
+ [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+ "lvx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvebx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvehx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvewx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvxl $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB))]>,
+ Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fsub V_immneg0,
+ (fsub (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB)))]>,
+ Requires<[FPContractions]>;
+
+def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
+def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
+def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ [(set VRRC:$vD,
+ (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddfp $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddubm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+
+
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vand $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vandc $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
+ (vnot_ppc VRRC:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfsx $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfux $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctsxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctuxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+
+def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubfp $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsububm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vnor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
+ VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vxor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltb $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vsplth $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltw $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+
+def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
+def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuhum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuwum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+// * 32-bit
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+// * 64-bit
+def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+ (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
+ (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
+ (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
+ (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+ (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+ (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+ (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+ (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+ (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+ (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+ (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+ (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+ (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+ (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+ (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 0000000..b424d11
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 0000000..84a15b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,907 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = Addr{20-16}; // Base Reg
+ let Inst{16-31} = Addr{15-0}; // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = DS_RA{18-14}; // Register #
+ let Inst{16-29} = DS_RA{13-0}; // Displacement.
+ let Inst{30-31} = xo;
+}
+
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{2-6};
+ let BI{0-1} = BIBO{0-1};
+ let BI{2-4} = CR;
+ let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> ST;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ string cstr, InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> RT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+ let Constraints = cstr;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = RT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 0000000..2bc109c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,655 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "PPCHazardRecognizers.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+}
+
+using namespace llvm;
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Should use subtarget info to pick the right hazard recognizer. For
+ // now, always return a PPC970 recognizer.
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new PPCHazardRecognizer970(*TII);
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI)
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return 0;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const MCInstrDesc &MCID = get(Opc);
+ if (MCID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, MCID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ DebugLoc DL;
+ if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
+ if (SrcReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(PPC::R11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
+ if (SrcReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(PPC::X11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
+ if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ // FIXME (64-bit): Enable
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+
+ // We need to store the CR in the low 4-bits of the saved value. First,
+ // issue a MFCR to save all of the CRBits.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ .addReg(SrcReg, getKillRegState(isKill)));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are
+ // in CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
+ // rlwinm scratch, scratch, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(ShiftBits)
+ .addImm(0).addImm(31));
+ }
+
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(ScratchReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
+ // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+ // backend currently only uses CR1EQ as an individual bit, this should
+ // not cause any bug. If we need other uses of CR bits, the following
+ // code may be invalid.
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // STVX VAL, 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+void
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
+ if (DestReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
+ }
+ } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
+ if (DestReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ }
+ } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
+ } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
+
+ unsigned Reg = 0;
+ if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+ DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+ DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+ DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+ DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+ DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+ DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+ DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+ DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // Dest = LVX 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+ addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case PPC::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ case PPC::PROLOG_LABEL:
+ case PPC::EH_LABEL:
+ case PPC::GC_LABEL:
+ case PPC::DBG_VALUE:
+ return 0;
+ default:
+ return 4; // PowerPC instructions are all 4 bytes
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 0000000..90bacc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,149 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+} // end namespace PPCII
+
+
+class PPCInstrInfo : public PPCGenInstrInfo {
+ PPCTargetMachine &TM;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+public:
+ explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 0000000..f248b5b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1482 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+def SDT_PPCnop : SDTypeProfile<0, 0, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// This sequence is used for long double->int conversions. It changes the
+// bits in the FPSCR which is not modelled.
+def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+ [SDNPOutGlue]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+ [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
+ [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+ SDTCisVT<3, f64>]>,
+ [SDNPInGlue]>;
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
+def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support atomic operations
+def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field. Used by instructions like 'addi'.
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT {
+ list<Register> Defs = [CR0];
+ bit RC = 1;
+}
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
+ let PrintMethod = "printS16X4ImmOperand";
+}
+def directbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
+}
+def calltarget : Operand<iPTR> {
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def aaddr : Operand<iPTR> {
+ let PrintMethod = "printAbsAddrOperand";
+}
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
+}
+// Address operands
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let EncoderMethod = "getMemRIEncoding";
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
+}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
+
+// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+ (ops (i32 20), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
+ [(set GPRC:$result,
+ (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+ i32imm:$BROPC), "",
+ []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+ "", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
+ "b${p:cc}lr ${p:reg}", BrB,
+ [(retflag)]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins), "", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", BrB,
+ [(br bb:$dst)]>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_Darwin : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_Darwin : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+ }
+}
+
+// SVR4 ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_SVR4 : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_SVR4 : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB,
+ [(PPCcall_SVR4 (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+ }
+}
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+ "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+ "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+ "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+ "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+ "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+ "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+ "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+ "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", LdStLWARX,
+ [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", LdStSTWCX,
+ [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+ "lfs $rD, $src", LdStLFDU,
+ [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+ "lfd $rD, $src", LdStLFD,
+ [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfs $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfd $rD, $addr", LdStLFD,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", LdStLFDU,
+ [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", LdStLFDU,
+ [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+ "stfs $rS, $dst", LdStUX,
+ [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+ "stfd $rS, $dst", LdStUX,
+ [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2 in {
+def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(store GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+let mayStore = 1 in {
+def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
+ "stwux $rS, $rA, $rB", LdStGeneral,
+ []>;
+}
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", LdStUX,
+ [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+ "stfsx $frS, $dst", LdStUX,
+ [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfdx $frS, $dst", LdStUX,
+ [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "sync", LdStSync,
+ [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IntGeneral,
+ []>;
+}
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ "la $rD, $sym($rA)", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+}
+
+let isReMaterializable = 1 in {
+ def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, immSExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+ []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "slw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "srw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "sraw $rA, $rS, $rB", IntShift,
+ [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Defs = [CARRY] in {
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
+ "srawi $rA, $rS, $SH", IntShift,
+ [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+}
+def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
+ "cntlzw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+ def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+ "frsp $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fsqrt $frD, $frB", FPSqrt,
+ [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fsqrts $frD, $frB", FPSqrt,
+ [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+
+
+// XL-Form instructions. condition register logical ops.
+//
+def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+ "mcrf $BF, $BFA", BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "creqv $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "cror $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+ "creqv $dst, $dst, $dst", BrCR,
+ []>;
+
+def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+ "crxor $dst, $dst, $dst", BrCR,
+ []>;
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970. As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
+// vreg = MCRF CR0
+// MFCR <kill of whatever preg got assigned to vreg>
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it. Represent this in the
+// instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfcr $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Instructions to manipulate FPSCR. Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IntMTFSB0,
+ [(PPCmtfsb0 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IntMTFSB0,
+ [(PPCmtfsb1 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ // MTFSF does not actually produce an FP result. We pretend it copies
+ // input reg B to the output. If we didn't do this it would look like the
+ // instruction had no outputs (because we aren't modelling the FPSCR) and
+ // it would be deleted.
+ def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
+ (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+ "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+ [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
+ F8RC:$rT, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
+ "mffs $rT", IntMFFS,
+ [(set F8RC:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def FADDrtz: AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+//
+def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_Cracked;
+}
+def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divw $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divwu $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhwu $rT, $rA, $rB", IntMulHWU,
+ [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mullw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+let Defs = [CARRY] in {
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in { // FPU Operations.
+let Uses = [RM] in {
+ def FMADD : AForm_1<63, 29,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMADDS : AForm_1<59, 29,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUB : AForm_1<63, 28,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUBS : AForm_1<59, 28,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FNMADD : AForm_1<63, 31,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMADDS : AForm_1<59, 31,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUB : AForm_1<63, 30,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUBS : AForm_1<59, 30,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+ (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Uses = [RM] in {
+ def FADD : AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ def FADDS : AForm_2<59, 21,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ def FDIV : AForm_2<63, 18,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fdiv $FRT, $FRA, $FRB", FPDivD,
+ [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ def FDIVS : AForm_2<59, 18,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fdivs $FRT, $FRA, $FRB", FPDivS,
+ [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ def FMUL : AForm_3<63, 25,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fmul $FRT, $FRA, $FRB", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+ def FMULS : AForm_3<59, 25,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fmuls $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+ def FSUB : AForm_2<63, 20,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fsub $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ def FSUBS : AForm_2<59, 20,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fsubs $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ }
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+ (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+def RLWINMo : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM : MForm_2<23,
+ (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+ (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+ (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+ (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+ (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub immSExt16:$imm, GPRC:$in),
+ (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+ (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+ (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
+ (BL_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
+ (BL_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
+ (BL_SVR4 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
+ (BL_SVR4 texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS GPRC:$in, tjumptable:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS GPRC:$in, tblockaddress:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+ (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+ Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+ (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+ Requires<[FPContractions]>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+ (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+ (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+ (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+ (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm /*ll*/),
+ (i32 imm /*ls*/),
+ (i32 imm /*sl*/),
+ (i32 imm /*ss*/),
+ (i32 imm /*device*/)),
+ (SYNC)>;
+
+def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 0000000..4590f00
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,468 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC32 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl PPCCompilationCallbackC\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#else
+void PPC32CompilationCallback() {
+ llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ defined(__ppc64__)
+#ifdef __ELF__
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC64CompilationCallback\n"
+ ".section \".opd\",\"aw\"\n"
+ ".align 3\n"
+"PPC64CompilationCallback:\n"
+ ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+ ".size PPC64CompilationCallback,24\n"
+ ".previous\n"
+ ".align 4\n"
+ ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+#endif
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr 0\n"
+ "std 0, 16(1)\n"
+ "stdu 1, -280(1)\n"
+ // Save all int arg registers
+ "std 10, 272(1)\n" "std 9, 264(1)\n"
+ "std 8, 256(1)\n" "std 7, 248(1)\n"
+ "std 6, 240(1)\n" "std 5, 232(1)\n"
+ "std 4, 224(1)\n" "std 3, 216(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
+ "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
+ "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
+ "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
+ "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
+ "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
+ "stfd 1, 112(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr 3, 0\n" // return address (still in r0)
+ "ld 5, 280(1)\n" // stub's frame
+ "ld 4, 16(5)\n" // stub's lr
+ "li 5, 1\n" // 1 == 64 bit
+#ifdef __ELF__
+ "bl PPCCompilationCallbackC\n"
+ "nop\n"
+#else
+ "bl _PPCCompilationCallbackC\n"
+#endif
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "ld 10, 272(1)\n" "ld 9, 264(1)\n"
+ "ld 8, 256(1)\n" "ld 7, 248(1)\n"
+ "ld 6, 240(1)\n" "ld 5, 232(1)\n"
+ "ld 4, 224(1)\n" "ld 3, 216(1)\n"
+ // Restore all FP arg registers
+ "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
+ "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
+ "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
+ "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
+ "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
+ "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
+ "lfd 1, 112(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld 1, 280(1)\n"
+ "ld 0, 16(1)\n"
+ "mtlr 0\n"
+ // XXX: any special TOC handling in the ELF case for JIT?
+ "bctr\n"
+ );
+#else
+void PPC64CompilationCallback() {
+ llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+ // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+ // instructions to save the caller's address if this is a lazy-compilation
+ // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+ // into a register and jump through it.
+ StubLayout Result = {10*4, 4};
+ return Result;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+ return Addr;
+ }
+
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ if (is64Bit) {
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+ return Addr;
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: llvm_unreachable("Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Old, 7*4);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 0000000..47ead59
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,49 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+
+ class PPCJITInfo : public TargetJITInfo {
+ protected:
+ PPCTargetMachine &TM;
+ bool is64Bit;
+ public:
+ PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+ useGOT = 0;
+ is64Bit = tmIs64Bit;
+ }
+
+ virtual StubLayout getStubLayout();
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 0000000..33af426
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,173 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+ return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ MCContext &Ctx = AP.OutContext;
+
+ SmallString<128> Name;
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+ Name += AP.MAI->getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+ (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+ isImplicitlyPrivate = true;
+
+ AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI(AP).getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+
+ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+ // then add the suffix.
+ if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
+ MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ AsmPrinter &Printer, bool isDarwin) {
+ MCContext &Ctx = Printer.OutContext;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ if (MO.getTargetFlags() & PPCII::MO_LO16)
+ RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16;
+ else if (MO.getTargetFlags() & PPCII::MO_HA16)
+ RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16;
+
+ // FIXME: This isn't right, but we don't have a good way to express this in
+ // the MC Level, see below.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+ RefKind = MCSymbolRefExpr::VK_None;
+
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+
+ // Subtract off the PIC base if required.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+
+ const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+ // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+ // since it is not a symbol!
+ }
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ assert(0 && "unknown operand type");
+ case MachineOperand::MO_Register:
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
+ isDarwin);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 0000000..e2649c8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,132 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// VarArgsStackOffset - StackOffset for start of stack
+ /// arguments.
+ int VarArgsStackOffset;
+ /// VarArgsNumGPR - Index of the first unused integer
+ /// register for parameter passing.
+ unsigned VarArgsNumGPR;
+ /// VarArgsNumFPR - Index of the first unused double
+ /// register for parameter passing.
+ unsigned VarArgsNumFPR;
+
+public:
+ explicit PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ SpillsCR(false),
+ LRStoreRequired(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false),
+ VarArgsFrameIndex(0),
+ VarArgsStackOffset(0),
+ VarArgsNumGPR(0),
+ VarArgsNumFPR(0) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+ unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+ void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+ void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 0000000..3164e33
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 0000000..2e90b7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,645 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
+// FIXME (64-bit): Eventually enable by default.
+namespace llvm {
+cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC32 register scavenger"),
+ cl::Hidden);
+cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC64 register scavenger"),
+ cl::Hidden);
+}
+
+using namespace llvm;
+
+// FIXME (64-bit): Should be inlined.
+bool
+PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
+ return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
+ (EnablePPC64RS && Subtarget.isPPC64()));
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
+ ST.isPPC64() ? 0 : 1,
+ ST.isPPC64() ? 0 : 1),
+ Subtarget(ST), TII(tii) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RCRegClass;
+ return &PPC::GPRCRegClass;
+}
+
+const unsigned*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ // 32-bit Darwin calling convention.
+ static const unsigned Darwin32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR, 0
+ };
+
+ // 32-bit SVR4 calling convention.
+ static const unsigned SVR4_CalleeSavedRegs[] = {
+ PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+
+ PPC::VRSAVE,
+
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ 0
+ };
+ // 64-bit Darwin calling convention.
+ static const unsigned Darwin64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR8, 0
+ };
+
+ // 64-bit SVR4 calling convention.
+ static const unsigned SVR4_64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+
+ PPC::VRSAVE,
+
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ 0
+ };
+
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
+ Darwin32_CalleeSavedRegs;
+
+ return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
+ Reserved.set(PPC::R0);
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ // The SVR4 ABI reserves r2 and r13
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::R2); // System-reserved register
+ Reserved.set(PPC::R13); // Small Data Area pointer register
+ }
+ // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // when the stack frame is too big to address directly; we need two regs.
+ // This is a hack.
+ if (Subtarget.isDarwinABI()) {
+ Reserved.set(PPC::R2);
+ }
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is over conservative, as it also prevents allocation of R31
+ // when the FP is not needed.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+ Reserved.set(PPC::R31);
+
+ if (!requiresRegisterScavenging(MF))
+ Reserved.set(PPC::R0); // FIXME (64-bit): Remove
+
+ Reserved.set(PPC::X0);
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+ Reserved.set(PPC::X31);
+
+ // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::X2);
+ }
+ // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // when the stack frame is too big to address directly; we need two regs.
+ // This is a hack.
+ if (Subtarget.isDarwinABI()) {
+ Reserved.set(PPC::X2);
+ }
+ }
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
+ addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(StackReg)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
+ const TargetRegisterClass *RC, int SPAdj) {
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ // FIXME: move ARM callee-saved reg scan to target independent code, then
+ // search for already spilled CS register here.
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ return Reg;
+}
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (MaxAlign > TargetAlign)
+ report_fatal_error("Dynamic alloca with large aligns not supported");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
+
+ // FIXME (64-bit): Use "findScratchRegister"
+ unsigned Reg;
+ if (requiresRegisterScavenging(MF))
+ Reg = findScratchRegister(II, RS, RC, SPAdj);
+ else
+ Reg = PPC::R0;
+
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X0, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+ bool LP64 = Subtarget.isPPC64();
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ if (SrcReg != PPC::CR0)
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(getPPCRegisterNumbering(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Find out which operand is the frame index.
+ unsigned FIOperandNo = 0;
+ while (!MI.getOperand(FIOperandNo).isFI()) {
+ ++FIOperandNo;
+ assert(FIOperandNo != MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ if (MI.isInlineAsm())
+ OffsetOperandNo = FIOperandNo-1;
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II, SPAdj, RS);
+ return;
+ }
+
+ // Special case for pseudo-op SPILL_CR.
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ PPC::R31 : PPC::R1,
+ false);
+
+ // Figure out if the offset in the instruction is shifted right two bits. This
+ // is true for instructions like "STD", which the machine implicitly adds two
+ // low zeros to.
+ bool isIXAddr = false;
+ switch (OpC) {
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ isIXAddr = true;
+ break;
+ }
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ // Naked functions have stack size 0, although getStackSize may not reflect that
+ // because we didn't call all the pieces that compute it for naked functions.
+ if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+ Offset += MFI->getStackSize();
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (isIXAddr)
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+ // FIXME: figure out what SPAdj is doing here.
+
+ // FIXME (64-bit): Use "findScratchRegister".
+ unsigned SReg;
+ if (requiresRegisterScavenging(MF))
+ SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
+ else
+ SReg = PPC::R0;
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ .addReg(SReg, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (OpC != TargetOpcode::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!Subtarget.isPPC64())
+ return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 0000000..1cc7213
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,70 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include <map>
+
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ std::map<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// requiresRegisterScavenging - We require a register scavenger.
+ /// FIXME (64-bit): Should be inlined.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 0000000..1acdf4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,326 @@
+//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ field bits<5> Num = SubReg.Num;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ field bits<3> Num = num;
+ let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>;
+def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>;
+def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>;
+def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>;
+def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>;
+def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>;
+def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>;
+def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>;
+def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>;
+def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>;
+def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
+def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
+def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
+def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
+def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
+def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
+def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
+def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
+def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
+def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
+def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
+def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
+def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
+def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
+def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
+def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
+def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
+def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
+def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
+def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
+def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
+def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+
+// 64-bit General-purpose registers
+def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>;
+def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>;
+def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>;
+def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>;
+def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>;
+def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>;
+def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>;
+def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>;
+def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>;
+def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>;
+def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
+def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
+def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
+def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
+def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
+def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
+def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
+def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
+def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
+def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
+def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
+def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
+def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
+def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
+def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
+def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
+def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
+def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
+def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
+def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
+def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
+def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>;
+def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>;
+def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>;
+def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>;
+def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>;
+def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>;
+def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>;
+def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>;
+def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>;
+def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>;
+def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
+def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
+def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
+def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
+def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
+def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
+def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
+def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
+def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
+def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
+def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
+def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
+def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
+def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
+def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
+def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
+def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
+def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
+def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
+def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
+def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
+def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+
+// Vector registers
+def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>;
+def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>;
+def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>;
+def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>;
+def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>;
+def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>;
+def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>;
+def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>;
+def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>;
+def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>;
+def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
+def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
+def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
+def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
+def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
+def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
+def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
+def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
+def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
+def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
+def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
+def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
+def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
+def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
+def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
+def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
+def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
+def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
+def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
+def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
+def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
+def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">;
+def CR0GT : CRBIT< 1, "1">;
+def CR0EQ : CRBIT< 2, "2">;
+def CR0UN : CRBIT< 3, "3">;
+def CR1LT : CRBIT< 4, "4">;
+def CR1GT : CRBIT< 5, "5">;
+def CR1EQ : CRBIT< 6, "6">;
+def CR1UN : CRBIT< 7, "7">;
+def CR2LT : CRBIT< 8, "8">;
+def CR2GT : CRBIT< 9, "9">;
+def CR2EQ : CRBIT<10, "10">;
+def CR2UN : CRBIT<11, "11">;
+def CR3LT : CRBIT<12, "12">;
+def CR3GT : CRBIT<13, "13">;
+def CR3EQ : CRBIT<14, "14">;
+def CR3UN : CRBIT<15, "15">;
+def CR4LT : CRBIT<16, "16">;
+def CR4GT : CRBIT<17, "17">;
+def CR4EQ : CRBIT<18, "18">;
+def CR4UN : CRBIT<19, "19">;
+def CR5LT : CRBIT<20, "20">;
+def CR5GT : CRBIT<21, "21">;
+def CR5EQ : CRBIT<22, "22">;
+def CR5UN : CRBIT<23, "23">;
+def CR6LT : CRBIT<24, "24">;
+def CR6GT : CRBIT<25, "25">;
+def CR6EQ : CRBIT<26, "26">;
+def CR6UN : CRBIT<27, "27">;
+def CR7LT : CRBIT<28, "28">;
+def CR7GT : CRBIT<29, "29">;
+def CR7EQ : CRBIT<30, "30">;
+def CR7UN : CRBIT<31, "31">;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
+}
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>;
+
+// Carry bit. In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1); this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, LR)>;
+
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+ (sequence "X%u", 30, 14),
+ X31, X13, X0, X1, LR8)>;
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+ (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+ V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+ V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+ (add CR0LT, CR0GT, CR0EQ, CR0UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN)>
+{
+ let CopyCost = -1;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)> {
+ let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
+}
+
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 0000000..a33e7e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 0000000..9664f14
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,505 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU : FuncUnit; // Branch unit
+def SLU : FuncUnit; // Store/load unit
+def SRU : FuncUnit; // special register unit
+def IU1 : FuncUnit; // integer unit 1 (simple)
+def IU2 : FuncUnit; // integer unit 2 (complex)
+def FPU1 : FuncUnit; // floating point unit 1
+def FPU2 : FuncUnit; // floating point unit 2
+def VPU : FuncUnit; // vector permutation unit
+def VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def VFPU : FuncUnit; // vector floating point unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral : InstrItinClass;
+def IntCompare : InstrItinClass;
+def IntDivD : InstrItinClass;
+def IntDivW : InstrItinClass;
+def IntMFFS : InstrItinClass;
+def IntMFVSCR : InstrItinClass;
+def IntMTFSB0 : InstrItinClass;
+def IntMTSRD : InstrItinClass;
+def IntMulHD : InstrItinClass;
+def IntMulHW : InstrItinClass;
+def IntMulHWU : InstrItinClass;
+def IntMulLI : InstrItinClass;
+def IntRFID : InstrItinClass;
+def IntRotateD : InstrItinClass;
+def IntRotate : InstrItinClass;
+def IntShift : InstrItinClass;
+def IntTrapD : InstrItinClass;
+def IntTrapW : InstrItinClass;
+def BrB : InstrItinClass;
+def BrCR : InstrItinClass;
+def BrMCR : InstrItinClass;
+def BrMCRX : InstrItinClass;
+def LdStDCBA : InstrItinClass;
+def LdStDCBF : InstrItinClass;
+def LdStDCBI : InstrItinClass;
+def LdStGeneral : InstrItinClass;
+def LdStDSS : InstrItinClass;
+def LdStICBI : InstrItinClass;
+def LdStUX : InstrItinClass;
+def LdStLD : InstrItinClass;
+def LdStLDARX : InstrItinClass;
+def LdStLFD : InstrItinClass;
+def LdStLFDU : InstrItinClass;
+def LdStLHA : InstrItinClass;
+def LdStLMW : InstrItinClass;
+def LdStLVecX : InstrItinClass;
+def LdStLWA : InstrItinClass;
+def LdStLWARX : InstrItinClass;
+def LdStSLBIA : InstrItinClass;
+def LdStSLBIE : InstrItinClass;
+def LdStSTD : InstrItinClass;
+def LdStSTDCX : InstrItinClass;
+def LdStSTVEBX : InstrItinClass;
+def LdStSTWCX : InstrItinClass;
+def LdStSync : InstrItinClass;
+def SprISYNC : InstrItinClass;
+def SprMFSR : InstrItinClass;
+def SprMTMSR : InstrItinClass;
+def SprMTSR : InstrItinClass;
+def SprTLBSYNC : InstrItinClass;
+def SprMFCR : InstrItinClass;
+def SprMFMSR : InstrItinClass;
+def SprMFSPR : InstrItinClass;
+def SprMFTB : InstrItinClass;
+def SprMTSPR : InstrItinClass;
+def SprMTSRIN : InstrItinClass;
+def SprRFI : InstrItinClass;
+def SprSC : InstrItinClass;
+def FPGeneral : InstrItinClass;
+def FPCompare : InstrItinClass;
+def FPDivD : InstrItinClass;
+def FPDivS : InstrItinClass;
+def FPFused : InstrItinClass;
+def FPRes : InstrItinClass;
+def FPSqrt : InstrItinClass;
+def VecGeneral : InstrItinClass;
+def VecFP : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex : InstrItinClass;
+def VecPerm : InstrItinClass;
+def VecFPRound : InstrItinClass;
+def VecVSL : InstrItinClass;
+def VecVSR : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IntGeneral
+// addc IntGeneral
+// adde IntGeneral
+// addi IntGeneral
+// addic IntGeneral
+// addic. IntGeneral
+// addis IntGeneral
+// addme IntGeneral
+// addze IntGeneral
+// and IntGeneral
+// andc IntGeneral
+// andi. IntGeneral
+// andis. IntGeneral
+// b BrB
+// bc BrB
+// bcctr BrB
+// bclr BrB
+// cmp IntCompare
+// cmpi IntCompare
+// cmpl IntCompare
+// cmpli IntCompare
+// cntlzd IntRotateD
+// cntlzw IntGeneral
+// crand BrCR
+// crandc BrCR
+// creqv BrCR
+// crnand BrCR
+// crnor BrCR
+// cror BrCR
+// crorc BrCR
+// crxor BrCR
+// dcba LdStDCBA
+// dcbf LdStDCBF
+// dcbi LdStDCBI
+// dcbst LdStDCBF
+// dcbt LdStGeneral
+// dcbtst LdStGeneral
+// dcbz LdStDCBF
+// divd IntDivD
+// divdu IntDivD
+// divw IntDivW
+// divwu IntDivW
+// dss LdStDSS
+// dst LdStDSS
+// dstst LdStDSS
+// eciwx LdStGeneral
+// ecowx LdStGeneral
+// eieio LdStGeneral
+// eqv IntGeneral
+// extsb IntGeneral
+// extsh IntGeneral
+// extsw IntRotateD
+// fabs FPGeneral
+// fadd FPGeneral
+// fadds FPGeneral
+// fcfid FPGeneral
+// fcmpo FPCompare
+// fcmpu FPCompare
+// fctid FPGeneral
+// fctidz FPGeneral
+// fctiw FPGeneral
+// fctiwz FPGeneral
+// fdiv FPDivD
+// fdivs FPDivS
+// fmadd FPFused
+// fmadds FPGeneral
+// fmr FPGeneral
+// fmsub FPFused
+// fmsubs FPGeneral
+// fmul FPFused
+// fmuls FPGeneral
+// fnabs FPGeneral
+// fneg FPGeneral
+// fnmadd FPFused
+// fnmadds FPGeneral
+// fnmsub FPFused
+// fnmsubs FPGeneral
+// fres FPRes
+// frsp FPGeneral
+// frsqrte FPGeneral
+// fsel FPGeneral
+// fsqrt FPSqrt
+// fsqrts FPSqrt
+// fsub FPGeneral
+// fsubs FPGeneral
+// icbi LdStICBI
+// isync SprISYNC
+// lbz LdStGeneral
+// lbzu LdStGeneral
+// lbzux LdStUX
+// lbzx LdStGeneral
+// ld LdStLD
+// ldarx LdStLDARX
+// ldu LdStLD
+// ldux LdStLD
+// ldx LdStLD
+// lfd LdStLFD
+// lfdu LdStLFDU
+// lfdux LdStLFDU
+// lfdx LdStLFDU
+// lfs LdStLFDU
+// lfsu LdStLFDU
+// lfsux LdStLFDU
+// lfsx LdStLFDU
+// lha LdStLHA
+// lhau LdStLHA
+// lhaux LdStLHA
+// lhax LdStLHA
+// lhbrx LdStGeneral
+// lhz LdStGeneral
+// lhzu LdStGeneral
+// lhzux LdStUX
+// lhzx LdStGeneral
+// lmw LdStLMW
+// lswi LdStLMW
+// lswx LdStLMW
+// lvebx LdStLVecX
+// lvehx LdStLVecX
+// lvewx LdStLVecX
+// lvsl LdStLVecX
+// lvsr LdStLVecX
+// lvx LdStLVecX
+// lvxl LdStLVecX
+// lwa LdStLWA
+// lwarx LdStLWARX
+// lwaux LdStLHA
+// lwax LdStLHA
+// lwbrx LdStGeneral
+// lwz LdStGeneral
+// lwzu LdStGeneral
+// lwzux LdStUX
+// lwzx LdStGeneral
+// mcrf BrMCR
+// mcrfs FPGeneral
+// mcrxr BrMCRX
+// mfcr SprMFCR
+// mffs IntMFFS
+// mfmsr SprMFMSR
+// mfspr SprMFSPR
+// mfsr SprMFSR
+// mfsrin SprMFSR
+// mftb SprMFTB
+// mfvscr IntMFVSCR
+// mtcrf BrMCRX
+// mtfsb0 IntMTFSB0
+// mtfsb1 IntMTFSB0
+// mtfsf IntMTFSB0
+// mtfsfi IntMTFSB0
+// mtmsr SprMTMSR
+// mtmsrd LdStLD
+// mtspr SprMTSPR
+// mtsr SprMTSR
+// mtsrd IntMTSRD
+// mtsrdin IntMTSRD
+// mtsrin SprMTSRIN
+// mtvscr IntMFVSCR
+// mulhd IntMulHD
+// mulhdu IntMulHD
+// mulhw IntMulHW
+// mulhwu IntMulHWU
+// mulld IntMulHD
+// mulli IntMulLI
+// mullw IntMulHW
+// nand IntGeneral
+// neg IntGeneral
+// nor IntGeneral
+// or IntGeneral
+// orc IntGeneral
+// ori IntGeneral
+// oris IntGeneral
+// rfi SprRFI
+// rfid IntRFID
+// rldcl IntRotateD
+// rldcr IntRotateD
+// rldic IntRotateD
+// rldicl IntRotateD
+// rldicr IntRotateD
+// rldimi IntRotateD
+// rlwimi IntRotate
+// rlwinm IntGeneral
+// rlwnm IntGeneral
+// sc SprSC
+// slbia LdStSLBIA
+// slbie LdStSLBIE
+// sld IntRotateD
+// slw IntGeneral
+// srad IntRotateD
+// sradi IntRotateD
+// sraw IntShift
+// srawi IntShift
+// srd IntRotateD
+// srw IntGeneral
+// stb LdStGeneral
+// stbu LdStGeneral
+// stbux LdStGeneral
+// stbx LdStGeneral
+// std LdStSTD
+// stdcx. LdStSTDCX
+// stdu LdStSTD
+// stdux LdStSTD
+// stdx LdStSTD
+// stfd LdStUX
+// stfdu LdStUX
+// stfdux LdStUX
+// stfdx LdStUX
+// stfiwx LdStUX
+// stfs LdStUX
+// stfsu LdStUX
+// stfsux LdStUX
+// stfsx LdStUX
+// sth LdStGeneral
+// sthbrx LdStGeneral
+// sthu LdStGeneral
+// sthux LdStGeneral
+// sthx LdStGeneral
+// stmw LdStLMW
+// stswi LdStLMW
+// stswx LdStLMW
+// stvebx LdStSTVEBX
+// stvehx LdStSTVEBX
+// stvewx LdStSTVEBX
+// stvx LdStSTVEBX
+// stvxl LdStSTVEBX
+// stw LdStGeneral
+// stwbrx LdStGeneral
+// stwcx. LdStSTWCX
+// stwu LdStGeneral
+// stwux LdStGeneral
+// stwx LdStGeneral
+// subf IntGeneral
+// subfc IntGeneral
+// subfe IntGeneral
+// subfic IntGeneral
+// subfme IntGeneral
+// subfze IntGeneral
+// sync LdStSync
+// td IntTrapD
+// tdi IntTrapD
+// tlbia LdStSLBIA
+// tlbie LdStDCBF
+// tlbsync SprTLBSYNC
+// tw IntTrapW
+// twi IntTrapW
+// vaddcuw VecGeneral
+// vaddfp VecFP
+// vaddsbs VecGeneral
+// vaddshs VecGeneral
+// vaddsws VecGeneral
+// vaddubm VecGeneral
+// vaddubs VecGeneral
+// vadduhm VecGeneral
+// vadduhs VecGeneral
+// vadduwm VecGeneral
+// vadduws VecGeneral
+// vand VecGeneral
+// vandc VecGeneral
+// vavgsb VecGeneral
+// vavgsh VecGeneral
+// vavgsw VecGeneral
+// vavgub VecGeneral
+// vavguh VecGeneral
+// vavguw VecGeneral
+// vcfsx VecFP
+// vcfux VecFP
+// vcmpbfp VecFPCompare
+// vcmpeqfp VecFPCompare
+// vcmpequb VecGeneral
+// vcmpequh VecGeneral
+// vcmpequw VecGeneral
+// vcmpgefp VecFPCompare
+// vcmpgtfp VecFPCompare
+// vcmpgtsb VecGeneral
+// vcmpgtsh VecGeneral
+// vcmpgtsw VecGeneral
+// vcmpgtub VecGeneral
+// vcmpgtuh VecGeneral
+// vcmpgtuw VecGeneral
+// vctsxs VecFP
+// vctuxs VecFP
+// vexptefp VecFP
+// vlogefp VecFP
+// vmaddfp VecFP
+// vmaxfp VecFPCompare
+// vmaxsb VecGeneral
+// vmaxsh VecGeneral
+// vmaxsw VecGeneral
+// vmaxub VecGeneral
+// vmaxuh VecGeneral
+// vmaxuw VecGeneral
+// vmhaddshs VecComplex
+// vmhraddshs VecComplex
+// vminfp VecFPCompare
+// vminsb VecGeneral
+// vminsh VecGeneral
+// vminsw VecGeneral
+// vminub VecGeneral
+// vminuh VecGeneral
+// vminuw VecGeneral
+// vmladduhm VecComplex
+// vmrghb VecPerm
+// vmrghh VecPerm
+// vmrghw VecPerm
+// vmrglb VecPerm
+// vmrglh VecPerm
+// vmrglw VecPerm
+// vmsubfp VecFP
+// vmsummbm VecComplex
+// vmsumshm VecComplex
+// vmsumshs VecComplex
+// vmsumubm VecComplex
+// vmsumuhm VecComplex
+// vmsumuhs VecComplex
+// vmulesb VecComplex
+// vmulesh VecComplex
+// vmuleub VecComplex
+// vmuleuh VecComplex
+// vmulosb VecComplex
+// vmulosh VecComplex
+// vmuloub VecComplex
+// vmulouh VecComplex
+// vnor VecGeneral
+// vor VecGeneral
+// vperm VecPerm
+// vpkpx VecPerm
+// vpkshss VecPerm
+// vpkshus VecPerm
+// vpkswss VecPerm
+// vpkswus VecPerm
+// vpkuhum VecPerm
+// vpkuhus VecPerm
+// vpkuwum VecPerm
+// vpkuwus VecPerm
+// vrefp VecFPRound
+// vrfim VecFPRound
+// vrfin VecFPRound
+// vrfip VecFPRound
+// vrfiz VecFPRound
+// vrlb VecGeneral
+// vrlh VecGeneral
+// vrlw VecGeneral
+// vrsqrtefp VecFP
+// vsel VecGeneral
+// vsl VecVSL
+// vslb VecGeneral
+// vsldoi VecPerm
+// vslh VecGeneral
+// vslo VecPerm
+// vslw VecGeneral
+// vspltb VecPerm
+// vsplth VecPerm
+// vspltisb VecPerm
+// vspltish VecPerm
+// vspltisw VecPerm
+// vspltw VecPerm
+// vsr VecVSR
+// vsrab VecGeneral
+// vsrah VecGeneral
+// vsraw VecGeneral
+// vsrb VecGeneral
+// vsrh VecGeneral
+// vsro VecPerm
+// vsrw VecGeneral
+// vsubcuw VecGeneral
+// vsubfp VecFP
+// vsubsbs VecGeneral
+// vsubshs VecGeneral
+// vsubsws VecGeneral
+// vsububm VecGeneral
+// vsububs VecGeneral
+// vsubuhm VecGeneral
+// vsubuhs VecGeneral
+// vsubuwm VecGeneral
+// vsubuws VecGeneral
+// vsum2sws VecComplex
+// vsum4sbs VecComplex
+// vsum4shs VecComplex
+// vsum4ubs VecComplex
+// vsumsws VecComplex
+// vupkhpx VecPerm
+// vupkhsb VecPerm
+// vupkhsh VecPerm
+// vupklpx VecPerm
+// vupklsb VecPerm
+// vupklsh VecPerm
+// vxor VecGeneral
+// xor IntGeneral
+// xori IntGeneral
+// xoris IntGeneral
+//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 0000000..ad4da1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,64 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<
+ [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 0000000..03c3b29
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,74 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<
+ [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 0000000..00cac3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,80 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IU3 : FuncUnit; // integer unit 3 (7450 simple)
+def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+ [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
+ InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 0000000..1671f22
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,84 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<
+ [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
+ InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
+ InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
+ InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
+ InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
+ InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
+ InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 0000000..d4258b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCTargetMachine.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 0000000..341b69c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCTargetMachine;
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 0000000..cf194de
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,141 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <cstdlib>
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+
+ if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+ switch(hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_POWERPC_601: return "601";
+ case CPU_SUBTYPE_POWERPC_602: return "602";
+ case CPU_SUBTYPE_POWERPC_603: return "603";
+ case CPU_SUBTYPE_POWERPC_603e: return "603e";
+ case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+ case CPU_SUBTYPE_POWERPC_604: return "604";
+ case CPU_SUBTYPE_POWERPC_604e: return "604e";
+ case CPU_SUBTYPE_POWERPC_620: return "620";
+ case CPU_SUBTYPE_POWERPC_750: return "750";
+ case CPU_SUBTYPE_POWERPC_7400: return "7400";
+ case CPU_SUBTYPE_POWERPC_7450: return "7450";
+ case CPU_SUBTYPE_POWERPC_970: return "970";
+ default: ;
+ }
+
+ return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : PPCGenSubtargetInfo(TT, CPU, FS)
+ , StackAlignment(16)
+ , DarwinDirective(PPC::DIR_NONE)
+ , IsGigaProcessor(false)
+ , Has64BitSupport(false)
+ , Use64BitRegs(false)
+ , IsPPC64(is64Bit)
+ , HasAltivec(false)
+ , HasFSQRT(false)
+ , HasSTFIWX(false)
+ , HasLazyResolverStubs(false)
+ , IsJITCodeModel(false)
+ , TargetTriple(TT) {
+
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+#if defined(__APPLE__)
+ if (CPUName == "generic")
+ CPUName = GetCurrentPowerPCCPU();
+#endif
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (is64Bit) {
+ Has64BitSupport = true;
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ }
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (use64BitRegs() && !has64BitSupport())
+ Use64BitRegs = false;
+
+ // Set up darwin-specific properties.
+ if (isDarwin())
+ HasLazyResolverStubs = true;
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 0000000..e028de6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,151 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+class StringRef;
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_64
+ };
+}
+
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public PPCGenSubtargetInfo {
+protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool IsGigaProcessor;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasFSQRT;
+ bool HasSTFIWX;
+ bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+ : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
+ }
+
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
+ // Specific obvious features.
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool isGigaProcessor() const { return IsGigaProcessor; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return TargetTriple.isMacOSX(); }
+
+ bool isDarwinABI() const { return isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwin(); }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 0000000..f5744b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,94 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializePowerPCTarget() {
+ // Register the targets
+ RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
+ RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+}
+
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS, is64Bit),
+ DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+ FrameLowering(Subtarget), JITInfo(*this, is64Bit),
+ TLInfo(*this), TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+/// Override this for PowerPC. Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createPPCISelDag(*this));
+ return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ DisableJumpTables = true;
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 0000000..d06f084
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,96 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PPCInstrInfo InstrInfo;
+ PPCFrameLowering FrameLowering;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+ InstrItineraryData InstrItins;
+
+public:
+ PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM, bool is64Bit);
+
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const PPCFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const PPCTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const PPCRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+ virtual bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+ PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+ PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 0000000..5dc8568
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() {
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+ X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+ RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+ Y(ThePPC64Target, "ppc64", "PowerPC 64");
+}
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 0000000..dab35e5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,323 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+static cl::opt<bool> DisableDelaySlotFiller(
+ "disable-sparc-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the Sparc delay slot filler."),
+ cl::Hidden);
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "SPARC Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ MachineBasicBlock::iterator
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+ bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+ return new Filler(tm);
+}
+
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
+ MachineBasicBlock::iterator J = I;
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB, I);
+
+ ++FilledSlots;
+ Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
+ unsigned structSize = 0;
+ if (needsUnimp(I, structSize)) {
+ MachineBasicBlock::iterator J = I;
+ ++J; //skip the delay filler.
+ BuildMI(MBB, ++J, I->getDebugLoc(),
+ TII->get(SP::UNIMP)).addImm(structSize);
+ }
+ }
+ return Changed;
+}
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot)
+{
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ MachineBasicBlock::iterator I = slot;
+
+ if (slot->getOpcode() == SP::RET)
+ return MBB.end();
+
+ if (slot->getOpcode() == SP::RETL) {
+ --I;
+ if (I->getOpcode() != SP::RESTORErr)
+ return MBB.end();
+ //change retl to ret
+ slot->setDesc(TII->get(SP::RET));
+ return I;
+ }
+
+ //Call's delay filler can def some of call's uses.
+ if (slot->getDesc().isCall())
+ insertCallUses(slot, RegUses);
+ else
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool done = false;
+
+ while (!done) {
+ done = (I == MBB.begin());
+
+ if (!done)
+ --I;
+
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || I->getDesc().hasDelaySlot()
+ || isDelayFiller(MBB, I))
+ break;
+
+ if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(I, RegDefs, RegUses);
+ continue;
+ }
+
+ return I;
+ }
+ return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses)
+{
+
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ if (candidate->getDesc().mayLoad()) {
+ sawLoad = true;
+ if (sawStore)
+ return true;
+ }
+
+ if (candidate->getDesc().mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ if (!MO.isReg())
+ continue; // skip
+
+ unsigned Reg = MO.getReg();
+
+ if (MO.isDef()) {
+ //check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ //check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses)
+{
+
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown opcode.");
+ case SP::CALL: break;
+ case SP::JMPLrr:
+ case SP::JMPLri:
+ assert(MI->getNumOperands() >= 2);
+ const MachineOperand &Reg = MI->getOperand(0);
+ assert(Reg.isReg() && "JMPL first operand is not a register.");
+ assert(Reg.isUse() && "JMPL first operand is not a use.");
+ RegUses.insert(Reg.getReg());
+
+ const MachineOperand &RegOrImm = MI->getOperand(1);
+ if (RegOrImm.isImm())
+ break;
+ assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+ assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+ RegUses.insert(RegOrImm.getReg());
+ break;
+ }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses)
+{
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ if (MO.isUse())
+ RegUses.insert(Reg);
+
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+ if (RegSet.count(Reg))
+ return true;
+ // check Aliased Registers
+ for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ *Alias; ++ Alias)
+ if (RegSet.count(*Alias))
+ return true;
+
+ return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate)
+{
+ if (candidate == MBB.begin())
+ return false;
+ if (candidate->getOpcode() == SP::UNIMP)
+ return true;
+ const MCInstrDesc &prevdesc = (--candidate)->getDesc();
+ return prevdesc.hasDelaySlot();
+}
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+ if (!I->getDesc().isCall())
+ return false;
+
+ unsigned structSizeOpNum = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unknown call opcode.");
+ case SP::CALL: structSizeOpNum = 1; break;
+ case SP::JMPLrr:
+ case SP::JMPLri: structSizeOpNum = 2; break;
+ }
+
+ const MachineOperand &MO = I->getOperand(structSizeOpNum);
+ if (!MO.isImm())
+ return false;
+ StructSize = MO.getImm();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/FPMover.cpp b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 0000000..1423b1e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,141 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+ struct FPMover : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+
+ static char ID;
+ explicit FPMover(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Sparc Double-FP Move Fixer";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+ return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+ unsigned &OddReg) {
+ static const unsigned EvenHalvesOfPairs[] = {
+ SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+ SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+ };
+ static const unsigned OddHalvesOfPairs[] = {
+ SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+ SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+ };
+ static const unsigned DoubleRegsInOrder[] = {
+ SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+ SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+ };
+ for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ if (DoubleRegsInOrder[i] == DoubleReg) {
+ EvenReg = EvenHalvesOfPairs[i];
+ OddReg = OddHalvesOfPairs[i];
+ return;
+ }
+ llvm_unreachable("Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *MI = I++;
+ DebugLoc dl = MI->getDebugLoc();
+ if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+ MI->getOpcode() == SP::FpNEGD) {
+ Changed = true;
+ unsigned DestDReg = MI->getOperand(0).getReg();
+ unsigned SrcDReg = MI->getOperand(1).getReg();
+ if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+ MBB.erase(MI); // Eliminate the noop copy.
+ ++NoopFpDs;
+ continue;
+ }
+
+ unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+ getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+ getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (MI->getOpcode() == SP::FpMOVD)
+ MI->setDesc(TII->get(SP::FMOVS));
+ else if (MI->getOpcode() == SP::FpNEGD)
+ MI->setDesc(TII->get(SP::FNEGS));
+ else if (MI->getOpcode() == SP::FpABSD)
+ MI->setDesc(TII->get(SP::FABSS));
+ else
+ llvm_unreachable("Unknown opcode!");
+
+ MI->getOperand(0).setReg(EvenDestReg);
+ MI->getOperand(1).setReg(EvenSrcReg);
+ DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
+ // Insert copy for the other half of the double.
+ if (DestDReg != SrcDReg) {
+ MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+ .addReg(OddSrcReg);
+ DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
+ }
+ ++NumFpDs;
+ }
+ }
+ return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+ // If the target has V9 instructions, the fp-mover pseudos will never be
+ // emitted. Avoid a scan of the instructions to improve compile time.
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
new file mode 100644
index 0000000..6a7e090
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::sparcv9)
+ PointerSize = 8;
+
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "!";
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+
+ WeakRefDirective = "\t.weak\t";
+
+ PrivateGlobalPrefix = ".L";
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
new file mode 100644
index 0000000..0cb6827
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct SparcELFMCAsmInfo : public MCAsmInfo {
+ explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
new file mode 100644
index 0000000..cb2a7df
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -0,0 +1,79 @@
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCTargetDesc.h"
+#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SparcGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSparcMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSparcMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSparcMCRegisterInfo(X, SP::I7);
+ return X;
+}
+
+static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSparcMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+extern "C" void LLVMInitializeSparcTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
+ RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
+ createSparcMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
+ createSparcMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+ createSparcMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
new file mode 100644
index 0000000..2fd9e3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCMCTARGETDESC_H
+#define SPARCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSparcTarget;
+extern Target TheSparcV9Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for Sparc registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SparcGenRegisterInfo.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SparcGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h
new file mode 100644
index 0000000..7b2c614
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,109 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+ class FunctionPass;
+ class SparcTargetMachine;
+ class formatted_raw_ostream;
+
+ FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+ FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+
+} // end namespace llvm;
+
+namespace llvm {
+ // Enums corresponding to Sparc condition codes, both icc's and fcc's. These
+ // values must be kept in sync with the ones in the .td file.
+ namespace SPCC {
+ enum CondCodes {
+ //ICC_A = 8 , // Always
+ //ICC_N = 0 , // Never
+ ICC_NE = 9 , // Not Equal
+ ICC_E = 1 , // Equal
+ ICC_G = 10 , // Greater
+ ICC_LE = 2 , // Less or Equal
+ ICC_GE = 11 , // Greater or Equal
+ ICC_L = 3 , // Less
+ ICC_GU = 12 , // Greater Unsigned
+ ICC_LEU = 4 , // Less or Equal Unsigned
+ ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned
+ ICC_CS = 5 , // Carry Set/Less Unsigned
+ ICC_POS = 14 , // Positive
+ ICC_NEG = 6 , // Negative
+ ICC_VC = 15 , // Overflow Clear
+ ICC_VS = 7 , // Overflow Set
+
+ //FCC_A = 8+16, // Always
+ //FCC_N = 0+16, // Never
+ FCC_U = 7+16, // Unordered
+ FCC_G = 6+16, // Greater
+ FCC_UG = 5+16, // Unordered or Greater
+ FCC_L = 4+16, // Less
+ FCC_UL = 3+16, // Unordered or Less
+ FCC_LG = 2+16, // Less or Greater
+ FCC_NE = 1+16, // Not Equal
+ FCC_E = 9+16, // Equal
+ FCC_UE = 10+16, // Unordered or Equal
+ FCC_GE = 11+16, // Greater or Equal
+ FCC_UGE = 12+16, // Unordered or Greater or Equal
+ FCC_LE = 13+16, // Less or Equal
+ FCC_ULE = 14+16, // Unordered or Less or Equal
+ FCC_O = 15+16 // Ordered
+ };
+ }
+
+ inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case SPCC::ICC_NE: return "ne";
+ case SPCC::ICC_E: return "e";
+ case SPCC::ICC_G: return "g";
+ case SPCC::ICC_LE: return "le";
+ case SPCC::ICC_GE: return "ge";
+ case SPCC::ICC_L: return "l";
+ case SPCC::ICC_GU: return "gu";
+ case SPCC::ICC_LEU: return "leu";
+ case SPCC::ICC_CC: return "cc";
+ case SPCC::ICC_CS: return "cs";
+ case SPCC::ICC_POS: return "pos";
+ case SPCC::ICC_NEG: return "neg";
+ case SPCC::ICC_VC: return "vc";
+ case SPCC::ICC_VS: return "vs";
+ case SPCC::FCC_U: return "u";
+ case SPCC::FCC_G: return "g";
+ case SPCC::FCC_UG: return "ug";
+ case SPCC::FCC_L: return "l";
+ case SPCC::FCC_UL: return "ul";
+ case SPCC::FCC_LG: return "lg";
+ case SPCC::FCC_NE: return "ne";
+ case SPCC::FCC_E: return "e";
+ case SPCC::FCC_UE: return "ue";
+ case SPCC::FCC_GE: return "ge";
+ case SPCC::FCC_UGE: return "uge";
+ case SPCC::FCC_LE: return "le";
+ case SPCC::FCC_ULE: return "ule";
+ case SPCC::FCC_O: return "o";
+ }
+ }
+} // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
new file mode 100644
index 0000000..7643366
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,72 @@
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+
+def FeatureV9
+ : SubtargetFeature<"v9", "IsV9", "true",
+ "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+ : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+ : SubtargetFeature<"vis", "IsVIS", "true",
+ "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"v8", []>;
+def : Proc<"supersparc", []>;
+def : Proc<"sparclite", []>;
+def : Proc<"f934", []>;
+def : Proc<"hypersparc", []>;
+def : Proc<"sparclite86x", []>;
+def : Proc<"sparclet", []>;
+def : Proc<"tsc701", []>;
+def : Proc<"v9", [FeatureV9]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = SparcInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 0000000..345e1bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,251 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SparcAsmPrinter : public AsmPrinter {
+ public:
+ explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Sparc Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+
+ virtual void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+ };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand (opNum);
+ bool CloseParen = false;
+ if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+ O << "%hi(";
+ CloseParen = true;
+ } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+ !MO.isReg() && !MO.isImm()) {
+ O << "%lo(";
+ CloseParen = true;
+ }
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+ if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+ }
+
+ if (MI->getOperand(opNum+1).isReg() &&
+ MI->getOperand(opNum+1).getReg() == SP::G0)
+ return; // don't print "+%g0"
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ O << "+";
+ if (MI->getOperand(opNum+1).isGlobal() ||
+ MI->getOperand(opNum+1).isCPI()) {
+ O << "%lo(";
+ printOperand(MI, opNum+1, O);
+ O << ")";
+ } else {
+ printOperand(MI, opNum+1, O);
+ }
+}
+
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
+ raw_ostream &O) {
+ std::string operand = "";
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ default: assert(0 && "Operand is not a register ");
+ case MachineOperand::MO_Register:
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Operand is not a physical register ");
+ assert(MO.getReg() != SP::O7 &&
+ "%o7 is assigned as destination for getpcx!");
+ operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+ break;
+ }
+
+ unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
+ unsigned bbNum = MI->getParent()->getNumber();
+
+ O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+ O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
+
+ O << "\t sethi\t"
+ << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n' ;
+
+ O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
+ O << "\tor\t" << operand
+ << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n';
+ O << "\tadd\t" << operand << ", %o7, " << operand << '\n';
+
+ return true;
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() {
+ RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+ RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 0000000..856f87a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,36 @@
+//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+ //Custom assign SRet to [sp+64].
+ CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+ // i32 f32 arguments get passed in integer registers if there is space.
+ CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ // f64 arguments are split and passed through registers or through stack.
+ CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 0000000..320c8ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ } else {
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 0000000..9a2ddc8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+ const SparcSubtarget &STI;
+public:
+ explicit SparcFrameLowering(const SparcSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 0000000..8c6103d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,212 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const SparcSubtarget &Subtarget;
+ SparcTargetMachine& TM;
+public:
+ explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
+ : SelectionDAGISel(tm),
+ Subtarget(tm.getSubtarget<SparcSubtarget>()),
+ TM(tm) {
+ }
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ virtual const char *getPassName() const {
+ return "SPARC DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+
+private:
+ SDNode* getGlobalBaseReg();
+};
+} // end anonymous namespace
+
+SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (isInt<13>(CN->getSExtValue())) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(1);
+ Offset = Addr.getOperand(0).getOperand(0);
+ return true;
+ }
+ if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1).getOperand(0);
+ return true;
+ }
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+ Addr.getOperand(1).getOpcode() == SPISD::Lo)
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+ R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case SPISD::GLOBAL_BASE_REG:
+ return getGlobalBaseReg();
+
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ // FIXME: should use a custom expander to expose the SRA to the dag.
+ SDValue DivLHS = N->getOperand(0);
+ SDValue DivRHS = N->getOperand(1);
+
+ // Set the Y register to the high-part.
+ SDValue TopPart;
+ if (N->getOpcode() == ISD::SDIV) {
+ TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
+ CurDAG->getTargetConstant(31, MVT::i32)), 0);
+ } else {
+ TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+ }
+ TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
+ CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+ // FIXME: Handle div by immediate.
+ unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+ return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+ TopPart);
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ // FIXME: Handle mul by immediate.
+ SDValue MulLHS = N->getOperand(0);
+ SDValue MulRHS = N->getOperand(1);
+ unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+ SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ MulLHS, MulRHS);
+ // The high part is in the Y register.
+ return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+ return NULL;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectADDRrr(Op, Op0, Op1))
+ SelectADDRri(Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+ return new SparcDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 0000000..d70b163
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1275 @@
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ assert (ArgFlags.isSRet());
+
+ //Assign SRet argument
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ 0,
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ static const unsigned RegList[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ //Try to get first reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ } else {
+ //Assign whole thing in stack
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8,4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ //Try to get second reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4,4),
+ LocVT, LocInfo));
+ return true;
+}
+
+#include "SparcGenCallingConv.inc"
+
+SDValue
+SparcTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (MF.getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
+ // If the function returns a struct, copy the SRetReturnReg to I0
+ if (MF.getFunction()->hasStructRetAttr()) {
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+ Flag = Chain.getValue(1);
+ if (MF.getRegInfo().liveout_empty())
+ MF.getRegInfo().addLiveOut(SP::I0);
+ RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
+ }
+
+ SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+
+ if (Flag.getNode())
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+ RetAddrOffsetNode, Flag);
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+ RetAddrOffsetNode);
+}
+
+/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values. TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue
+SparcTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
+
+ const unsigned StackOffset = 92;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ if (i == 0 && Ins[i].Flags.isSRet()) {
+ //Get SRet from [%fp+64]
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ if (VA.isRegLoc()) {
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+ unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+ SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+ assert(i+1 < e);
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ SDValue LoVal;
+ if (NextVA.isMemLoc()) {
+ int FrameIdx = MF.getFrameInfo()->
+ CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+ &SP::IntRegsRegClass);
+ LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
+ }
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+ SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ if (VA.getLocVT() == MVT::f32)
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+ else if (VA.getLocVT() != MVT::i32) {
+ Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+ DAG.getValueType(VA.getLocVT()));
+ Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+ }
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::f64);
+ //If it is double-word aligned, just load.
+ if (Offset % 8 == 0) {
+ int FI = MF.getFrameInfo()->CreateFixedObject(8,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false,false, 0);
+ InVals.push_back(Load);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset+4,
+ true);
+ SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+ SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+ MachinePointerInfo(),
+ false, false, 0);
+
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load ;
+ if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+ Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+ FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+ MachinePointerInfo(),
+ VA.getValVT(), false, false,0);
+ Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+ }
+ InVals.push_back(Load);
+ }
+
+ if (MF.getFunction()->hasStructRetAttr()) {
+ //Copy the SRet Argument to SRetReturnReg
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+ SFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ // Store remaining ArgRegs to the stack if this is a varargs function.
+ if (isVarArg) {
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+ const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = CCInfo.getNextStackOffset();
+ if (NumAllocated == 6)
+ ArgOffset += StackOffset;
+ else {
+ assert(!ArgOffset);
+ ArgOffset = 68+4*NumAllocated;
+ }
+
+ // Remember the vararg offset for the va_start implementation.
+ FuncInfo->setVarArgsFrameOffset(ArgOffset);
+
+ std::vector<SDValue> OutChains;
+
+ for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+ SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0));
+ ArgOffset += 4;
+ }
+
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Sparc target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ // Keep stack frames 8-byte aligned.
+ ArgsSize = (ArgsSize+7) & ~7;
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ //Create local copies for byval args.
+ SmallVector<SDValue, 8> ByValArgs;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ SDValue Arg = OutVals[i];
+ unsigned Size = Flags.getByValSize();
+ unsigned Align = Flags.getByValAlign();
+
+ int FI = MFI->CreateStackObject(Size, Align, false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+ Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+ false, //isVolatile,
+ (Size <= 32), //AlwaysInline if size <= 32
+ MachinePointerInfo(), MachinePointerInfo());
+ ByValArgs.push_back(FIPtr);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ const unsigned StackOffset = 92;
+ bool hasStructRetAttr = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+ //Use local copy if it is a byval arg.
+ if (Flags.isByVal())
+ Arg = ByValArgs[byvalArgIdx++];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (Flags.isSRet()) {
+ assert(VA.needsCustom());
+ // store SRet argument in %sp+64
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(64);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ hasStructRetAttr = true;
+ continue;
+ }
+
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+
+ if (VA.isMemLoc()) {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ //if it is double-word aligned, just store.
+ if (Offset % 8 == 0) {
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ continue;
+ }
+ }
+
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Arg, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ // Sparc is big-endian, so the high part comes first.
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+ // Increment the pointer to the other half.
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(4));
+ // Load the low part.
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+ assert(i+1 != e);
+ CCValAssign &NextVA = ArgLocs[++i];
+ if (NextVA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+ } else {
+ //Store the low part in stack.
+ unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ } else {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ // Store the high part.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ // Store the low part.
+ PtrOff = DAG.getIntPtrConstant(Offset+4);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ if (VA.getLocVT() != MVT::f32) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+
+
+ // Emit all stores, make sure the occur before any copies into physregs.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // Returns a chain & a flag for retval copy to use
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ if (hasStructRetAttr)
+ Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ unsigned Reg = RVLocs[i].getLocReg();
+
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+ const Function *CalleeFn = 0;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CalleeFn = dyn_cast<Function>(G->getGlobal());
+ } else if (ExternalSymbolSDNode *E =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const Function *Fn = DAG.getMachineFunction().getFunction();
+ const Module *M = Fn->getParent();
+ CalleeFn = M->getFunction(E->getSymbol());
+ }
+
+ if (!CalleeFn)
+ return 0;
+
+ assert(CalleeFn->hasStructRetAttr() &&
+ "Callee does not have the StructRet attribute.");
+
+ PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+ Type *ElementTy = Ty->getElementType();
+ return getTargetData()->getTypeAllocSize(ElementTy);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ISD::SETEQ: return SPCC::ICC_E;
+ case ISD::SETNE: return SPCC::ICC_NE;
+ case ISD::SETLT: return SPCC::ICC_L;
+ case ISD::SETGT: return SPCC::ICC_G;
+ case ISD::SETLE: return SPCC::ICC_LE;
+ case ISD::SETGE: return SPCC::ICC_GE;
+ case ISD::SETULT: return SPCC::ICC_CS;
+ case ISD::SETULE: return SPCC::ICC_LEU;
+ case ISD::SETUGT: return SPCC::ICC_GU;
+ case ISD::SETUGE: return SPCC::ICC_CC;
+ }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return SPCC::FCC_E;
+ case ISD::SETNE:
+ case ISD::SETUNE: return SPCC::FCC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return SPCC::FCC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT: return SPCC::FCC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE: return SPCC::FCC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return SPCC::FCC_GE;
+ case ISD::SETULT: return SPCC::FCC_UL;
+ case ISD::SETULE: return SPCC::FCC_ULE;
+ case ISD::SETUGT: return SPCC::FCC_UG;
+ case ISD::SETUGE: return SPCC::FCC_UGE;
+ case ISD::SETUO: return SPCC::FCC_U;
+ case ISD::SETO: return SPCC::FCC_O;
+ case ISD::SETONE: return SPCC::FCC_LG;
+ case ISD::SETUEQ: return SPCC::FCC_UE;
+ }
+}
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+ addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+ addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Sparc doesn't have i1 sign extending load
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into LO/HI parts.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+ // Sparc doesn't have sext_inreg, replace them with shl/sra
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Sparc has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Custom expand fp<->sint
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ // Sparc has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+ // Sparc doesn't have BRCOND either, it has BR_CC.
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // FIXME: There are instructions available for ATOMIC_FENCE
+ // on SparcV8 and later.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // FIXME: Sparc provides these multiplies, but we don't have them yet.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ // VAARG needs to be lowered to not do unaligned accesses for doubles.
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+
+ // No debug info support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ setStackPointerRegisterToSaveRestore(SP::O6);
+
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+ setMinFunctionAlignment(2);
+
+ computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPISD::CMPICC: return "SPISD::CMPICC";
+ case SPISD::CMPFCC: return "SPISD::CMPFCC";
+ case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRFCC: return "SPISD::BRFCC";
+ case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+ case SPISD::Hi: return "SPISD::Hi";
+ case SPISD::Lo: return "SPISD::Lo";
+ case SPISD::FTOI: return "SPISD::FTOI";
+ case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::CALL: return "SPISD::CALL";
+ case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+ case SPISD::FLUSHW: return "SPISD::FLUSHW";
+ }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2, KnownOne2;
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case SPISD::SELECT_ICC:
+ case SPISD::SELECT_FCC:
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+ Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
+ CC == ISD::SETNE &&
+ ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+ (LHS.getOpcode() == SPISD::SELECT_FCC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ getPointerTy());
+ SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ GlobalBase, RelAddr);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ getPointerTy());
+ SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ GlobalBase, RelAddr);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Convert the fp value to integer in an FP register.
+ assert(Op.getValueType() == MVT::i32);
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getOperand(0).getValueType() == MVT::i32);
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ // Convert the int value to FP in an FP register.
+ return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ // Get the condition flag.
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<EVT> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Glue);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<EVT> VTs;
+ VTs.push_back(LHS.getValueType()); // subcc returns a value
+ VTs.push_back(MVT::Glue);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ Opc = SPISD::SELECT_ICC;
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Offset =
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getRegister(SP::I6, MVT::i32),
+ DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+ MVT::i32));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+ MachinePointerInfo(SV), false, false, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ MVT::i32));
+ // Store the incremented VAList to the legalized pointer
+ InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+ VAListPtr, MachinePointerInfo(SV), false, false, 0);
+ // Load the actual argument out of the pointer VAList, unless this is an
+ // f64 load.
+ if (VT != MVT::f64)
+ return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, 0);
+
+ // Otherwise, load it as i64, then do a bitconvert.
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, 0);
+
+ // Bit-Convert the value to f64.
+ SDValue Ops[2] = {
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
+ V.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0); // Legalize the chain.
+ SDValue Size = Op.getOperand(1); // Legalize the size.
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = SP::O6;
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
+
+ // The resultant pointer is actually 16 words from the bottom of the stack,
+ // to provide a register spill area.
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+ DAG.getConstant(96, MVT::i32));
+ SDValue Ops[2] = { NewVal, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+ dl, MVT::Other, DAG.getEntryNode());
+ return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned FrameReg = SP::I6;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue FrameAddr;
+ if (depth == 0)
+ FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ FrameAddr, DAG.getIntPtrConstant(56));
+ FrameAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ }
+ return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned RetReg = SP::I7;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue RetAddr;
+ if (depth == 0)
+ RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ RetAddr,
+ DAG.getIntPtrConstant((i == depth-1)?60:56));
+ RetAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ }
+ return RetAddr;
+}
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Sparc.");
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned BROpcode;
+ unsigned CC;
+ DebugLoc dl = MI->getDebugLoc();
+ // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ BROpcode = SP::BCOND;
+ break;
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ BROpcode = SP::FBCOND;
+ break;
+ }
+
+ CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ }
+ }
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, SP::IntRegsRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Sparc target isn't yet aware of offsets.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 0000000..8a1886a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,103 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Sparc.h"
+
+namespace llvm {
+ namespace SPISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ GLOBAL_BASE_REG, // Global base reg for PIC
+ FLUSHW // FLUSH register windows to stack
+ };
+ }
+
+ class SparcTargetLowering : public TargetLowering {
+ public:
+ SparcTargetLowering(TargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+ };
+} // end namespace llvm
+
+#endif // SPARC_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 0000000..6535259
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,114 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SP";
+
+ bits<2> op;
+ let Inst{31-30} = op; // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<3> op2;
+ bits<22> imm22;
+ let op = 0; // op = 0
+ let Inst{24-22} = op2;
+ let Inst{21-0} = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : F2<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op2 = op2Val;
+
+ let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bit annul = 0; // currently unused
+
+ let cond = condVal;
+ let op2 = op2Val;
+
+ let Inst{29} = annul;
+ let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+ bits<6> op3;
+ bits<5> rs1;
+ let op{1} = 1; // Op = 2 or 3
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+ let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<8> asi = 0; // asi not currently used
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12-5} = asi; // address space identifier
+ let Inst{4-0} = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<13> simm13;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 0000000..7a6bf50
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,346 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "Sparc.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SparcGenInstrInfo.inc"
+
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+ : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::LDri ||
+ MI->getOpcode() == SP::LDFri ||
+ MI->getOpcode() == SP::LDDFri) {
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::STri ||
+ MI->getOpcode() == SP::STFri ||
+ MI->getOpcode() == SP::STDFri) {
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+static bool IsIntegerCC(unsigned CC)
+{
+ return (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+ switch(CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case SPCC::ICC_NE: return SPCC::ICC_E;
+ case SPCC::ICC_E: return SPCC::ICC_NE;
+ case SPCC::ICC_G: return SPCC::ICC_LE;
+ case SPCC::ICC_LE: return SPCC::ICC_G;
+ case SPCC::ICC_GE: return SPCC::ICC_L;
+ case SPCC::ICC_L: return SPCC::ICC_GE;
+ case SPCC::ICC_GU: return SPCC::ICC_LEU;
+ case SPCC::ICC_LEU: return SPCC::ICC_GU;
+ case SPCC::ICC_CC: return SPCC::ICC_CS;
+ case SPCC::ICC_CS: return SPCC::ICC_CC;
+ case SPCC::ICC_POS: return SPCC::ICC_NEG;
+ case SPCC::ICC_NEG: return SPCC::ICC_POS;
+ case SPCC::ICC_VC: return SPCC::ICC_VS;
+ case SPCC::ICC_VS: return SPCC::ICC_VC;
+
+ case SPCC::FCC_U: return SPCC::FCC_O;
+ case SPCC::FCC_O: return SPCC::FCC_U;
+ case SPCC::FCC_G: return SPCC::FCC_LE;
+ case SPCC::FCC_LE: return SPCC::FCC_G;
+ case SPCC::FCC_UG: return SPCC::FCC_ULE;
+ case SPCC::FCC_ULE: return SPCC::FCC_UG;
+ case SPCC::FCC_L: return SPCC::FCC_GE;
+ case SPCC::FCC_GE: return SPCC::FCC_L;
+ case SPCC::FCC_UL: return SPCC::FCC_UGE;
+ case SPCC::FCC_UGE: return SPCC::FCC_UL;
+ case SPCC::FCC_LG: return SPCC::FCC_UE;
+ case SPCC::FCC_UE: return SPCC::FCC_LG;
+ case SPCC::FCC_NE: return SPCC::FCC_E;
+ case SPCC::FCC_E: return SPCC::FCC_NE;
+ }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ //When we see a non-terminator, we are done
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ //Terminator is not a branch
+ if (!I->getDesc().isBranch())
+ return true;
+
+ //Handle Unconditional branches
+ if (I->getOpcode() == SP::BA) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ unsigned Opcode = I->getOpcode();
+ if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+ return true; //Unknown Opcode
+
+ SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+
+ //Transform the code
+ //
+ // brCC L1
+ // ba L2
+ // L1:
+ // ..
+ // L2:
+ //
+ // into
+ //
+ // brnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+ .addMBB(TargetBB);
+ MBB.addSuccessor(TargetBB);
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+ //FIXME: Handle subsequent conditional branches
+ //For now, we can't handle multiple conditional branches
+ return true;
+ }
+ return false;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "Sparc branch conditions should have one component!");
+
+ if (Cond.empty()) {
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+ return 1;
+ }
+
+ //Conditional branch
+ unsigned CC = Cond[0].getImm();
+
+ if (IsIntegerCC(CC))
+ BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+ else
+ BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+ return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ if (I->getOpcode() != SP::BA
+ && I->getOpcode() != SP::BCOND
+ && I->getOpcode() != SP::FBCOND)
+ break; // Not a branch
+
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
+}
+
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ llvm_unreachable("Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else
+ llvm_unreachable("Can't load this register from stack slot");
+}
+
+unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
+{
+ SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
+ unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+
+
+ DebugLoc dl;
+
+ BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
+ SparcFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 0000000..eda64ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,100 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SparcGenInstrInfo.inc"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+ enum {
+ Pseudo = (1<<0),
+ Load = (1<<1),
+ Store = (1<<2),
+ DelaySlot = (1<<3)
+ };
+}
+
+class SparcInstrInfo : public SparcGenInstrInfo {
+ const SparcRegisterInfo RI;
+ const SparcSubtarget& Subtarget;
+public:
+ explicit SparcInstrInfo(SparcSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const ;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 0000000..cf5c48f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,825 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions. Note that the machine may be running in 32-bit mode.
+def HasV9 : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions. Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate. In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
+
+def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
+
+def LO10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+ MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+ return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+ (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc :
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc :
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+
+def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"SPISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
+ [SDNPHasChain]>;
+
+def getPCX : Operand<i32> {
+ let PrintMethod = "printGetPCX";
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE : ICC_VAL< 9>; // Not Equal
+def ICC_E : ICC_VAL< 1>; // Equal
+def ICC_G : ICC_VAL<10>; // Greater
+def ICC_LE : ICC_VAL< 2>; // Less or Equal
+def ICC_GE : ICC_VAL<11>; // Greater or Equal
+def ICC_L : ICC_VAL< 3>; // Less
+def ICC_GU : ICC_VAL<12>; // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned
+def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned
+def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>; // Positive
+def ICC_NEG : ICC_VAL< 6>; // Negative
+def ICC_VC : ICC_VAL<15>; // Overflow Clear
+def ICC_VS : ICC_VAL< 7>; // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U : FCC_VAL<23>; // Unordered
+def FCC_G : FCC_VAL<22>; // Greater
+def FCC_UG : FCC_VAL<21>; // Unordered or Greater
+def FCC_L : FCC_VAL<20>; // Less
+def FCC_UL : FCC_VAL<19>; // Unordered or Less
+def FCC_LG : FCC_VAL<18>; // Less or Greater
+def FCC_NE : FCC_VAL<17>; // Not Equal
+def FCC_E : FCC_VAL<25>; // Equal
+def FCC_UE : FCC_VAL<24>; // Unordered or Equal
+def FCC_GE : FCC_VAL<25>; // Greater or Equal
+def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal
+def FCC_LE : FCC_VAL<27>; // Less or Equal
+def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal
+def FCC_O : FCC_VAL<29>; // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern>;
+
+// GETPCX for PIC
+let Defs = [O7] in {
+ def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
+}
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let hasSideEffects = 1, mayStore = 1 in {
+ let rd = 0, rs1 = 0, rs2 = 0 in
+ def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+ "flushw",
+ [(flushw)]>, Requires<[HasV9]>;
+ let rd = 0, rs1 = 1, simm13 = 3 in
+ def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+ "ta 3",
+ [(flushw)]>;
+}
+
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+ "unimp $val", []>;
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
+// fpmover pass.
+let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
+ def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpMOVD $src, $dst", []>;
+ def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpNEGD $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpABSD $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence. This has to handle all
+// permutations of selection between i32/f32/f64 on ICC and FCC.
+ // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in {
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_ICC PSEUDO!",
+ [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+
+ def SELECT_CC_DFP_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_ICC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+ def SELECT_CC_Int_FCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+
+ def SELECT_CC_FP_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_FCC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
+ let rd = O7.Num, rs1 = G0.Num in
+ def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+ let rd = I7.Num, rs1 = G0.Num in
+ def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %i7+$val", []>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri : F3_2<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr : F3_1<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri : F3_2<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMrr:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMri:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr : F3_1<3, 0b000100,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri : F3_2<3, 0b000100,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr : F3_1<3, 0b100100,
+ (outs), (ins MEMrr:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri : F3_2<3, 0b100100,
+ (outs), (ins MEMri:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr : F3_1<3, 0b100111,
+ (outs), (ins MEMrr:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri : F3_2<3, 0b100111,
+ (outs), (ins MEMri:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+ (outs IntRegs:$dst), (ins i32imm:$src),
+ "sethi $src, $dst",
+ [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+ def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND : F3_12<"and", 0b000001, and>;
+
+def ANDNrr : F3_1<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "andn $b, $c, $dst",
+ [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri : F3_2<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "andn $b, $c, $dst", []>;
+
+defm OR : F3_12<"or", 0b000010, or>;
+
+def ORNrr : F3_1<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "orn $b, $c, $dst",
+ [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri : F3_2<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "orn $b, $c, $dst", []>;
+defm XOR : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr : F3_1<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "xnor $b, $c, $dst",
+ [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri : F3_2<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+let Defs = [ICC] in
+ defm ADDCC : F3_12<"addcc", 0b010000, addc>;
+
+let Uses = [ICC] in
+ defm ADDX : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB : F3_12 <"sub" , 0b000100, sub>;
+let Uses = [ICC] in
+ defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+
+let Defs = [ICC] in
+ defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+
+let Uses = [ICC], Defs = [ICC] in
+ def SUBXCCrr: F3_1<2, 0b011100,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "subxcc $b, $c, $dst", []>;
+
+
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+ defm UMUL : F3_12np<"umul", 0b001010>;
+ defm SMUL : F3_12 <"smul", 0b001011, mul>;
+}
+
+// Section B.19 - Divide Instructions, p. 115
+let Defs = [Y] in {
+ defm UDIV : F3_12np<"udiv", 0b001110>;
+ defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE : F3_12np<"save" , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+ def BA : BranchSP<0b1000, (ins brtarget:$dst),
+ "ba $dst",
+ [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [ICC] in
+ def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [FCC] in
+ def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "fb$cc $dst",
+ [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O6],
+ hasDelaySlot = 1, isCall = 1,
+ Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ ICC, FCC, Y] in {
+ def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+
+ // indirect calls
+ def JMPLrr : F3_1<2, 0b111000,
+ (outs), (ins MEMrr:$ptr, variable_ops),
+ "call $ptr",
+ [(call ADDRrr:$ptr)]>;
+ def JMPLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr, variable_ops),
+ "call $ptr",
+ [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+let Uses = [Y] in
+ def RDY : F3_1<2, 0b101000,
+ (outs IntRegs:$dst), (ins),
+ "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+let Defs = [Y] in {
+ def WRYrr : F3_1<2, 0b110000,
+ (outs), (ins IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+ def WRYri : F3_2<2, 0b110000,
+ (outs), (ins IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+}
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fitos $src, $dst",
+ [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fitod $src, $dst",
+ [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fstoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstod $src, $dst",
+ [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtos $src, $dst",
+ [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fnegs $src, $dst",
+ [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fabss $src, $dst",
+ [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fsqrts $src, $dst",
+ [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fsqrtd $src, $dst",
+ [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS : F3_3<2, 0b110100, 0b001000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fadds $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD : F3_3<2, 0b110100, 0b001000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "faddd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS : F3_3<2, 0b110100, 0b001000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsubs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD : F3_3<2, 0b110100, 0b001000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fsubd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS : F3_3<2, 0b110100, 0b001001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fmuls $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD : F3_3<2, 0b110100, 0b001001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+ (fextend FPRegs:$src2)))]>;
+def FDIVS : F3_3<2, 0b110100, 0b001001101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fdivs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD : F3_3<2, 0b110100, 0b001001110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdivd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+let Defs = [FCC] in {
+ def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ (outs), (ins FPRegs:$src1, FPRegs:$src2),
+ "fcmps $src1, $src2\n\tnop",
+ [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+ def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fcmpd $src1, $src2\n\tnop",
+ [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
+ // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+ // FIXME: Add instruction encodings for the JIT some day.
+ let Uses = [ICC] in {
+ def MOVICCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def MOVFCCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [ICC] in {
+ def FMOVS_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def FMOVS_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ }
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+ def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fmovd $src, $dst", []>;
+ def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fnegd $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FABSD : F3_3<2, 0b110100, 0b000001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fabsd $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
+// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+ (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+ (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+ (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+ (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+ (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo. This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+ (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+ (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls:
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
new file mode 100644
index 0000000..0b74308
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -0,0 +1,47 @@
+//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Sparc specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SPARCMACHINEFUNCTIONINFO_H
+#define SPARCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ class SparcMachineFunctionInfo : public MachineFunctionInfo {
+ private:
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameOffset - Frame offset to start of varargs area.
+ int VarArgsFrameOffset;
+
+ /// SRetReturnReg - Holds the virtual register into which the sret
+ /// argument is passed.
+ unsigned SRetReturnReg;
+ public:
+ SparcMachineFunctionInfo()
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+ explicit SparcMachineFunctionInfo(MachineFunction &MF)
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+ void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 0000000..8c16251
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,127 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+ const TargetInstrInfo &tii)
+ : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ // FIXME: G1 reserved for now for large imm generation by frame code.
+ Reserved.set(SP::G1);
+ Reserved.set(SP::G2);
+ Reserved.set(SP::G3);
+ Reserved.set(SP::G4);
+ Reserved.set(SP::O6);
+ Reserved.set(SP::I6);
+ Reserved.set(SP::I7);
+ Reserved.set(SP::G0);
+ Reserved.set(SP::G5);
+ Reserved.set(SP::G6);
+ Reserved.set(SP::G7);
+ return Reserved;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+void
+SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+1).getImm();
+
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(SP::I6, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ } else {
+ // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
+ // scavenge a register here instead of reserving G1 all of the time.
+ unsigned OffHi = (unsigned)Offset >> 10U;
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(SP::I6);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(i).ChangeToRegister(SP::G1, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return SP::I6;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 0000000..f845667
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,58 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SparcGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+ SparcSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 0000000..cf92829
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,159 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Sparc register file
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "SP";
+}
+
+class SparcCtrlReg<string n>: Register<n> {
+ let Namespace = "SP";
+}
+
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+ let Num = num;
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
+}
+
+// Control Registers
+def ICC : SparcCtrlReg<"ICC">;
+def FCC : SparcCtrlReg<"FCC">;
+
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
+def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
+def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>;
+def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>;
+def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>;
+def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>;
+def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>;
+def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>;
+def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>;
+def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[72]>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[73]>;
+def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[74]>;
+def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[75]>;
+def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[76]>;
+def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[77]>;
+def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[78]>;
+def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[79]>;
+def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[80]>;
+def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[81]>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[82]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[83]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[84]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32,
+ (add L0, L1, L2, L3, L4, L5, L6,
+ L7, I0, I1, I2, I3, I4, I5,
+ O0, O1, O2, O3, O4, O5, O7,
+ G1,
+ // Non-allocatable regs:
+ G2, G3, G4, // FIXME: OK for use only in
+ // applications, not libraries.
+ O6, // stack ptr
+ I6, // frame ptr
+ I7, // return address
+ G0, // constant zero
+ G5, G6, G7 // reserved for kernel
+ )>;
+
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 0000000..190c575
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcTargetMachine.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 0000000..dcd4203
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcTargetMachine;
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+ ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 0000000..6c501cf
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,44 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SparcGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit) :
+ SparcGenSubtargetInfo(TT, CPU, FS),
+ IsV9(false),
+ V8DeprecatedInsts(false),
+ IsVIS(false),
+ Is64Bit(is64Bit) {
+
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+ if (is64Bit)
+ CPUName = "v9";
+ else
+ CPUName = "v8";
+ }
+ IsV9 = CPUName == "v9";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 0000000..00a04c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,58 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SparcGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class SparcSubtarget : public SparcGenSubtargetInfo {
+ bool IsV9;
+ bool V8DeprecatedInsts;
+ bool IsVIS;
+ bool Is64Bit;
+
+public:
+ SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64bit);
+
+ bool isV9() const { return IsV9; }
+ bool isVIS() const { return IsVIS; }
+ bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool is64Bit() const { return Is64Bit; }
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit()) {
+ p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64";
+ } else {
+ p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32";
+ }
+ return std::string(p);
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 0000000..3d7b4a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,66 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSparcTarget() {
+ // Register the target.
+ RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
+ RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS, is64bit),
+ DataLayout(Subtarget.getDataLayout()),
+ TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+ FrameLowering(Subtarget) {
+}
+
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createSparcISelDag(*this));
+ return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel){
+ PM.add(createSparcFPMoverPass(*this));
+ PM.add(createSparcDelaySlotFillerPass(*this));
+ return true;
+}
+
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+}
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 0000000..3c907dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,81 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "SparcInstrInfo.h"
+#include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
+#include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class SparcTargetMachine : public LLVMTargetMachine {
+ SparcSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
+ SparcInstrInfo InstrInfo;
+ SparcFrameLowering FrameLowering;
+public:
+ SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM, bool is64bit);
+
+ virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const SparcTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+/// SparcV8TargetMachine - Sparc 32-bit target machine
+///
+class SparcV8TargetMachine : public SparcTargetMachine {
+public:
+ SparcV8TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// SparcV9TargetMachine - Sparc 64-bit target machine
+///
+class SparcV9TargetMachine : public SparcTargetMachine {
+public:
+ SparcV9TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
new file mode 100644
index 0000000..c9d5b7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSparcTarget;
+Target llvm::TheSparcV9Target;
+
+extern "C" void LLVMInitializeSparcTargetInfo() {
+ RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+ RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 0000000..8540546
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,32 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SystemZMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+ PointerSize = 8;
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ PCSymbol = ".";
+}
+
+const MCSection *SystemZMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const{
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 0000000..a6a27e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,30 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SystemZMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ struct SystemZMCAsmInfo : public MCAsmInfo {
+ explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+ virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 0000000..23fb1e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,81 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSystemZMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSystemZMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+extern "C" void LLVMInitializeSystemZTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
+ createSystemZMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+ createSystemZMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+ createSystemZMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+ createSystemZMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 0000000..e2ad5af
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSystemZTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 0000000..88960b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,52 @@
+//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_H
+#define LLVM_TARGET_SystemZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SystemZTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ namespace SystemZCC {
+ // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
+ // SystemZInstrInfo.td. They must be kept in synch.
+ enum CondCodes {
+ O = 0,
+ H = 1,
+ NLE = 2,
+ L = 3,
+ NHE = 4,
+ LH = 5,
+ NE = 6,
+ E = 7,
+ NLH = 8,
+ HE = 9,
+ NL = 10,
+ LE = 11,
+ NH = 12,
+ NO = 13,
+ INVALID = -1
+ };
+ }
+
+ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm;
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 0000000..4c08c08
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,61 @@
+//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the SystemZ target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
+ "Support Z10 instructions">;
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z9", []>;
+def : Proc<"z10", [FeatureZ10]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+ let InstructionSet = SystemZInstrInfo;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 0000000..43dcdfc
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,221 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SystemZ assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SystemZAsmPrinter : public AsmPrinter {
+ public:
+ SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "SystemZ Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char* Modifier = 0);
+ void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+ void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char* Modifier = 0);
+ void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char* Modifier = 0);
+ void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (int16_t)MI->getOperand(OpNum).getImm();
+ }
+ void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (uint16_t)MI->getOperand(OpNum).getImm();
+ }
+ void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (int32_t)MI->getOperand(OpNum).getImm();
+ }
+ void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (uint32_t)MI->getOperand(OpNum).getImm();
+ }
+
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ O << *Mang->getSymbol(GV);
+
+ // Assemble calls via PLT for externally visible symbols if PIC.
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+ !GV->hasLocalLinkage())
+ O << "@PLT";
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ std::string Name(MAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ O << Name;
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "@PLT";
+
+ return;
+ }
+ default:
+ assert(0 && "Not implemented yet!");
+ }
+}
+
+
+void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should be already mapped!");
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
+ if (strncmp(Modifier + 7, "even", 4) == 0)
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
+ else if (strncmp(Modifier + 7, "odd", 3) == 0)
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
+ else
+ assert(0 && "Invalid subreg modifier");
+ }
+
+ O << '%' << getRegisterName(Reg);
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol: {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+ }
+ default:
+ assert(0 && "Not implemented yet!");
+ }
+
+ switch (MO.getTargetFlags()) {
+ default: assert(0 && "Unknown target flag on GV operand");
+ case SystemZII::MO_NO_FLAG:
+ break;
+ case SystemZII::MO_GOTENT: O << "@GOTENT"; break;
+ case SystemZII::MO_PLT: O << "@PLT"; break;
+ }
+
+ printOffset(MO.getOffset(), O);
+}
+
+void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+
+ // Print displacement operand.
+ printOperand(MI, OpNum+1, O);
+
+ // Print base operand (if any)
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ O << ')';
+ }
+}
+
+void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Index = MI->getOperand(OpNum+2);
+
+ // Print displacement operand.
+ printOperand(MI, OpNum+1, O);
+
+ // Print base operand (if any)
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ if (Index.getReg()) {
+ O << ',';
+ printOperand(MI, OpNum+2, O);
+ }
+ O << ')';
+ } else
+ assert(!Index.getReg() && "Should allocate base register first!");
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+ RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 0000000..c799a9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,46 @@
+//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for SystemZ architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // i64 is returned in register R2
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+ // f32 / f64 are returned in F0
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // The first 5 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+ // The first 4 floating point arguments of non-varargs functions are passed
+ // in FP registers.
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 0000000..2ad84a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,386 @@
+//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
+ // Fill the spill offsets map
+ static const unsigned SpillOffsTab[][2] = {
+ { SystemZ::R2D, 0x10 },
+ { SystemZ::R3D, 0x18 },
+ { SystemZ::R4D, 0x20 },
+ { SystemZ::R5D, 0x28 },
+ { SystemZ::R6D, 0x30 },
+ { SystemZ::R7D, 0x38 },
+ { SystemZ::R8D, 0x40 },
+ { SystemZ::R9D, 0x48 },
+ { SystemZ::R10D, 0x50 },
+ { SystemZ::R11D, 0x58 },
+ { SystemZ::R12D, 0x60 },
+ { SystemZ::R13D, 0x68 },
+ { SystemZ::R14D, 0x70 },
+ { SystemZ::R15D, 0x78 }
+ };
+
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+ for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+ RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register. This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int64_t NumBytes, const TargetInstrInfo &TII) {
+ unsigned Opc; uint64_t Chunk;
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+ if (Offset >= (1LL << 15) - 1) {
+ Opc = SystemZ::ADD64ri32;
+ Chunk = (1LL << 31) - 1;
+ } else {
+ Opc = SystemZ::ADD64ri16;
+ Chunk = (1LL << 15) - 1;
+ }
+
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
+ // The PSW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ Offset -= ThisVal;
+ }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ // Note that area for callee-saved stuff is already allocated, thus we need to
+ // 'undo' the stack movement.
+ uint64_t StackSize = MFI->getStackSize();
+ StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+ uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == SystemZ::MOV64mr ||
+ MBBI->getOpcode() == SystemZ::MOV64mrm))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // adjust stack pointer: R15 -= numbytes
+ if (StackSize || MFI->hasCalls()) {
+ assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+ "Invalid stack frame calculation!");
+ emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+ }
+
+ if (hasFP(MF)) {
+ // Update R11 with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+ .addReg(SystemZ::R15D);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(SystemZ::R11D);
+
+ }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ switch (RetOpcode) {
+ case SystemZ::RET: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ // Note that area for callee-saved stuff is already allocated, thus we need to
+ // 'undo' the stack movement.
+ uint64_t StackSize =
+ MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+ // Skip the final terminator instruction.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ --MBBI;
+ if (!PI->getDesc().isTerminator())
+ break;
+ }
+
+ // During callee-saved restores emission stack frame was not yet finialized
+ // (and thus - the stack size was unknown). Tune the offset having full stack
+ // size in hands.
+ if (StackSize || MFI->hasCalls()) {
+ assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+ MBBI->getOpcode() == SystemZ::MOV64rm) &&
+ "Expected to see callee-save register restore code");
+ assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+ "Invalid stack frame calculation!");
+
+ unsigned i = 0;
+ MachineInstr &MI = *MBBI;
+ while (!MI.getOperand(i).isImm()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Unexpected restore code!");
+ }
+
+ uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+ // If Offset does not fit into 20-bit signed displacement field we need to
+ // emit some additional code...
+ if (Offset > 524287) {
+ // Fold the displacement into load instruction as much as possible.
+ NumBytes = Offset - 524287;
+ Offset = 524287;
+ emitSPUpdate(MBB, MBBI, NumBytes, TII);
+ }
+
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Fixed objects are really located in the "previous" frame.
+ if (FI < 0)
+ StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+ Offset += StackSize - getOffsetOfLocalArea();
+
+ // Skip the register save area if we generated the stack frame.
+ if (StackSize || MFI->hasCalls())
+ Offset -= getOffsetOfLocalArea();
+
+ return Offset;
+}
+
+bool
+SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned CalleeFrameSize = 0;
+
+ // Scan the callee-saved and find the bounds of register spill area.
+ unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!SystemZ::FP64RegClass.contains(Reg)) {
+ unsigned Offset = RegSpillOffsets[Reg];
+ CalleeFrameSize += 8;
+ if (StartOffset > Offset) {
+ LowReg = Reg; StartOffset = Offset;
+ }
+ if (EndOffset < Offset) {
+ HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+ }
+ }
+ }
+
+ // Save information for epilogue inserter.
+ MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+ MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+ // Save GPRs
+ if (StartOffset) {
+ // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+ // registers to store, otherwise - just STORE.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+ SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+ // Add store operands.
+ MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+ if (LowReg == HighReg)
+ MIB.addReg(0);
+ MIB.addReg(LowReg, RegState::Kill);
+ if (LowReg != HighReg)
+ MIB.addReg(HighReg, RegState::Kill);
+
+ // Do a second scan adding regs as being killed by instruction
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg != LowReg && Reg != HighReg)
+ MIB.addReg(Reg, RegState::ImplicitKill);
+ }
+ }
+
+ // Save FPRs
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (SystemZ::FP64RegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+bool
+SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+ // Restore FP registers
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (SystemZ::FP64RegClass.contains(Reg))
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, TRI);
+ }
+
+ // Restore GP registers
+ unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+ unsigned StartOffset = RegSpillOffsets[LowReg];
+
+ if (StartOffset) {
+ // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+ // registers to load, otherwise - just LOAD.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+ SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+ // Add store operands.
+ MIB.addReg(LowReg, RegState::Define);
+ if (LowReg != HighReg)
+ MIB.addReg(HighReg, RegState::Define);
+
+ MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+ MIB.addImm(StartOffset);
+ if (LowReg == HighReg)
+ MIB.addReg(0);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Reg != LowReg && Reg != HighReg)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+
+ return true;
+}
+
+void
+SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Determine whether R15/R14 will ever be clobbered inside the function. And
+ // if yes - mark it as 'callee' saved.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Check whether high FPRs are ever used, if yes - we need to save R15 as
+ // well.
+ static const unsigned HighFPRs[] = {
+ SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
+ SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+ };
+
+ bool HighFPRsUsed = false;
+ for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+ HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+ if (FFI->hasCalls())
+ /* FIXME: function is varargs */
+ /* FIXME: function grabs RA */
+ /* FIXME: function calls eh_return */
+ MRI.setPhysRegUsed(SystemZ::R14D);
+
+ if (HighFPRsUsed ||
+ FFI->hasCalls() ||
+ FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+ FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+ /* FIXME: function is varargs */)
+ MRI.setPhysRegUsed(SystemZ::R15D);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 0000000..1284b68
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,57 @@
+//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_FRAMEINFO_H
+#define SYSTEMZ_FRAMEINFO_H
+
+#include "SystemZ.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace llvm {
+ class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+protected:
+ const SystemZSubtarget &STI;
+
+public:
+ explicit SystemZFrameLowering(const SystemZSubtarget &sti);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
+ bool hasFP(const MachineFunction &MF) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 0000000..2186ff1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,779 @@
+//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
+ /// instead of register numbers for the leaves of the matched tree.
+ struct SystemZRRIAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ SDValue IndexReg;
+ int64_t Disp;
+ bool isRI;
+
+ SystemZRRIAddressMode(bool RI = false)
+ : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
+ }
+
+ void dump() {
+ errs() << "SystemZRRIAddressMode " << this << '\n';
+ if (BaseType == RegBase) {
+ errs() << "Base.Reg ";
+ if (Base.Reg.getNode() != 0)
+ Base.Reg.getNode()->dump();
+ else
+ errs() << "nul";
+ errs() << '\n';
+ } else {
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+ }
+ if (!isRI) {
+ errs() << "IndexReg ";
+ if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+ else errs() << "nul";
+ }
+ errs() << " Disp " << Disp << '\n';
+ }
+ };
+}
+
+/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class SystemZDAGToDAGISel : public SelectionDAGISel {
+ const SystemZTargetLowering &Lowering;
+ const SystemZSubtarget &Subtarget;
+
+ void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp);
+ void getAddressOperands(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp,
+ SDValue &Index);
+
+ public:
+ SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual const char *getPassName() const {
+ return "SystemZ DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI16Imm - Return a target constant with the specified value, of type
+ /// i16.
+ inline SDValue getI16Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i16);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "SystemZGenDAGISel.inc"
+
+ private:
+ bool SelectAddrRI12Only(SDValue& Addr,
+ SDValue &Base, SDValue &Disp);
+ bool SelectAddrRI12(SDValue& Addr,
+ SDValue &Base, SDValue &Disp,
+ bool is12BitOnly = false);
+ bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+ bool SelectAddrRRI12(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+ bool SelectAddrRRI20(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+ bool SelectLAAddr(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+
+ SDNode *Select(SDNode *Node);
+
+ bool TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+
+ bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+ bool is12Bit, unsigned Depth = 0);
+ bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
+ };
+} // end anonymous namespace
+
+/// createSystemZISelDag - This pass converts a legalized DAG into a
+/// SystemZ-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+/// isImmSExt20 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 20-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmSExt20(int64_t Val, int64_t &Imm) {
+ if (Val >= -524288 && Val <= 524287) {
+ Imm = Val;
+ return true;
+ }
+ return false;
+}
+
+/// isImmZExt12 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero extension from a 12-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmZExt12(int64_t Val, int64_t &Imm) {
+ if (Val >= 0 && Val <= 0xFFF) {
+ Imm = Val;
+ return true;
+ }
+ return false;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+ bool is12Bit, unsigned Depth) {
+ DebugLoc dl = N.getDebugLoc();
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // FIXME: We can perform better here. If we have something like
+ // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
+ // imm into addressing mode.
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ int64_t Imm = 0;
+ bool Match = (is12Bit ?
+ isImmZExt12(AM.Disp + Val, Imm) :
+ isImmSExt20(AM.Disp + Val, Imm));
+ if (Match) {
+ AM.Disp = Imm;
+ return false;
+ }
+ break;
+ }
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+ AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Test if the LHS of the sub can be folded.
+ SystemZRRIAddressMode Backup = AM;
+ if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRI) {
+ AM = Backup;
+ break;
+ }
+
+ // If the base is a register with multiple uses, this transformation may
+ // save a mov. Otherwise it's probably better not to do it.
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+ (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue RHS = N.getNode()->getOperand(1);
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+
+ // Insert the new nodes into the topological ordering.
+ if (Zero.getNode()->getNodeId() == -1 ||
+ Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+ Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Neg.getNode()->getNodeId() == -1 ||
+ Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+ Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ return false;
+ }
+
+ case ISD::ADD: {
+ SystemZRRIAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (!AM.isRI &&
+ AM.BaseType == SystemZRRIAddressMode::RegBase &&
+ !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
+ AM.Base.Reg = N.getNode()->getOperand(0);
+ AM.IndexReg = N.getNode()->getOperand(1);
+ return false;
+ }
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ SystemZRRIAddressMode Backup = AM;
+ int64_t Offset = CN->getSExtValue();
+ int64_t Imm = 0;
+ bool MatchOffset = (is12Bit ?
+ isImmZExt12(AM.Disp + Offset, Imm) :
+ isImmSExt20(AM.Disp + Offset, Imm));
+ // The resultant disp must fit in 12 or 20-bits.
+ if (MatchOffset &&
+ // LHS should be an addr mode.
+ !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp = Imm;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
+ SystemZRRIAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, check to see if the index register is set.
+ if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
+ AM.IndexReg = N;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = SystemZRRIAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp) {
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+ Base = AM.Base.Reg;
+ else
+ Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ getAddressOperandsRI(AM, Base, Disp);
+ Index = AM.IndexReg;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// an unsigned 12-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
+ SDValue &Base, SDValue &Disp) {
+ return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
+}
+
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
+ SDValue &Base, SDValue &Disp,
+ bool is12BitOnly) {
+ SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM12);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+ return false;
+
+ // Check, whether we can match stuff using 20-bit displacements
+ if (!Done && !is12BitOnly &&
+ !MatchAddress(Addr, AM20, /* is12Bit */ false))
+ if (AM12.Disp == 0 && AM20.Disp != 0)
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM12.Base.Reg.getNode())
+ AM12.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+ getAddressOperandsRI(AM12, Base, Disp);
+
+ return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// a signed 20-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
+ SDValue &Base, SDValue &Disp) {
+ SystemZRRIAddressMode AM(/*isRI*/true);
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+ getAddressOperandsRI(AM, Base, Disp);
+
+ return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus an unsigned 12-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ SystemZRRIAddressMode AM20, AM12;
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM12);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+ return false;
+
+ // Check, whether we can match stuff using 20-bit displacements
+ if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
+ if (AM12.Disp == 0 && AM20.Disp != 0)
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM12.Base.Reg.getNode())
+ AM12.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM12.IndexReg.getNode())
+ AM12.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM12, Base, Disp, Index);
+
+ return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus a signed 20-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ SystemZRRIAddressMode AM;
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Disp, Index);
+
+ return true;
+}
+
+/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ SystemZRRIAddressMode AM;
+
+ if (MatchAddress(Addr, AM, false))
+ return false;
+
+ EVT VT = Addr.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+ if (AM.Base.Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity += 1;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity += 1;
+
+ if (Complexity > 2) {
+ getAddressOperands(AM, Base, Disp, Index);
+ return true;
+ }
+
+ return false;
+}
+
+bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ if (ISD::isNON_EXTLoad(N.getNode()) &&
+ IsLegalToFold(N, P, P, OptLevel))
+ return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
+ return false;
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+ EVT NVT = Node->getValueType(0);
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned Opcode = Node->getOpcode();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SDIVREM: {
+ unsigned Opc, MOpc;
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ EVT ResVT;
+ bool is32Bit = false;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i32:
+ Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+ ResVT = MVT::v2i64;
+ is32Bit = true;
+ break;
+ case MVT::i64:
+ Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+ ResVT = MVT::v2i64;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+ // Prepare the dividend
+ SDNode *Dividend;
+ if (is32Bit)
+ Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
+ else
+ Dividend = N0.getNode();
+
+ // Insert prepared dividend into suitable 'subreg'
+ SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, ResVT);
+ Dividend =
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+ SDValue(Tmp, 0), SDValue(Dividend, 0),
+ CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
+
+ SDNode *Result;
+ SDValue DivVal = SDValue(Dividend, 0);
+ if (foldedLoad) {
+ SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+ Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+ Ops, array_lengthof(Ops));
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+ } else {
+ Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
+ }
+
+ // Copy the division (odd subreg) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+ SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+
+ ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ // Copy the remainder (even subreg) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
+ SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+
+ ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ return NULL;
+ }
+ case ISD::UDIVREM: {
+ unsigned Opc, MOpc, ClrOpc;
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ EVT ResVT;
+
+ bool is32Bit = false;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i32:
+ Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+ ClrOpc = SystemZ::MOV64Pr0_even;
+ ResVT = MVT::v2i32;
+ is32Bit = true;
+ break;
+ case MVT::i64:
+ Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+ ClrOpc = SystemZ::MOV128r0_even;
+ ResVT = MVT::v2i64;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+ // Prepare the dividend
+ SDNode *Dividend = N0.getNode();
+
+ // Insert prepared dividend into suitable 'subreg'
+ SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, ResVT);
+ {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+ Dividend =
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+ SDValue(Tmp, 0), SDValue(Dividend, 0),
+ CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
+ }
+
+ // Zero out even subreg
+ Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
+
+ SDValue DivVal = SDValue(Dividend, 0);
+ SDNode *Result;
+ if (foldedLoad) {
+ SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+ Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+ Ops, array_lengthof(Ops));
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+ } else {
+ Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
+ }
+
+ // Copy the division (odd subreg) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+ SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+ ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ // Copy the remainder (even subreg) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
+ SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+ ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ return NULL;
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ errs() << "\n";
+ );
+ return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 0000000..48ca99f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,868 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation -----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
+ TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+ RegInfo = TM.getRegisterInfo();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass);
+ addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass);
+ addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
+ addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
+
+ if (!UseSoftFloat) {
+ addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
+ addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
+ }
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+ // TODO: It may be better to default to latency-oriented scheduling, however
+ // LLVM's current latency-oriented scheduler can't handle physreg definitions
+ // such as SystemZ has with PSW, so set this to the register-pressure
+ // scheduler, because it can.
+ setSchedulingPreference(Sched::RegPressure);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+ setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+
+ // FIXME: Can we lower these 2 efficiently?
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+ // FIXME: Can we support these natively?
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+ // Lower some FP stuff
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+
+ // We have only 64-bit bitconverts
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setMinFunctionAlignment(1);
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ default:
+ llvm_unreachable("Should not custom lower this!");
+ return SDValue();
+ }
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
+ return false;
+
+ // +0.0 lzer
+ // +0.0f lzdr
+ // -0.0 lzer + lner
+ // -0.0f lzdr + lndr
+ return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return C_RegisterClass;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ if (VT == MVT::i32)
+ return std::make_pair(0U, SystemZ::GR32RegisterClass);
+ else if (VT == MVT::i128)
+ return std::make_pair(0U, SystemZ::GR128RegisterClass);
+
+ return std::make_pair(0U, SystemZ::GR64RegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+SDValue
+SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ }
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // SystemZ target does not yet support tail call optimization.
+ isTailCall = false;
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+ if (isVarArg)
+ report_fatal_error("Varargs not supported yet");
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue ArgValue;
+ CCValAssign &VA = ArgLocs[i];
+ EVT LocVT = VA.getLocVT();
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ TargetRegisterClass *RC;
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << LocVT.getSimpleVT().SimpleTy
+ << "\n";
+#endif
+ llvm_unreachable(0);
+ case MVT::i64:
+ RC = SystemZ::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ RC = SystemZ::FP32RegisterClass;
+ break;
+ case MVT::f64:
+ RC = SystemZ::FP64RegisterClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+
+ // Create the nodes corresponding to a load from this parameter slot.
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ // from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ }
+
+ // If this is an 8/16/32-bit value, it is really passed promoted to 64
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ }
+
+ return Chain;
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+
+ // Offset to first argument stack slot.
+ const unsigned FirstArgOffset = 160;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr =
+ DAG.getCopyFromReg(Chain, dl,
+ (TFI->hasFP(MF) ?
+ SystemZ::R11D : SystemZ::R15D),
+ getPointerTy());
+
+ unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(Offset));
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+ DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ VA.getLocVT(), InFlag).getValue(1);
+ SDValue RetValue = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+
+ // If this is an 8/16/32-bit value, it is really passed promoted to 64
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+ InVals.push_back(RetValue);
+ }
+
+ return Chain;
+}
+
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ResValue = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // If this is an 8/16/32-bit value, it is really should be passed promoted
+ // to 64 bits.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
+ else if (VA.getLocInfo() == CCValAssign::AExt)
+ ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &SystemZCC,
+ SelectionDAG &DAG) const {
+ // FIXME: Emit a test if RHS is zero
+
+ bool isUnsigned = false;
+ SystemZCC::CondCodes TCC;
+ switch (CC) {
+ default:
+ llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ TCC = SystemZCC::E;
+ break;
+ case ISD::SETUEQ:
+ TCC = SystemZCC::NLH;
+ break;
+ case ISD::SETNE:
+ case ISD::SETONE:
+ TCC = SystemZCC::NE;
+ break;
+ case ISD::SETUNE:
+ TCC = SystemZCC::LH;
+ break;
+ case ISD::SETO:
+ TCC = SystemZCC::O;
+ break;
+ case ISD::SETUO:
+ TCC = SystemZCC::NO;
+ break;
+ case ISD::SETULE:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NH;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ TCC = SystemZCC::LE;
+ break;
+ case ISD::SETUGE:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NL;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ TCC = SystemZCC::HE;
+ break;
+ case ISD::SETUGT:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NLE;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ TCC = SystemZCC::H;
+ break;
+ case ISD::SETULT:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NHE;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ TCC = SystemZCC::L;
+ break;
+ }
+
+ SystemZCC = DAG.getConstant(TCC, MVT::i32);
+
+ DebugLoc dl = LHS.getDebugLoc();
+ return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+ dl, MVT::i64, LHS, RHS);
+}
+
+
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue SystemZCC;
+ SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+ return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, SystemZCC, Flag);
+}
+
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue SystemZCC;
+ SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(SystemZCC);
+ Ops.push_back(Flag);
+
+ return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ bool ExtraLoadRequired =
+ Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+ SDValue Result;
+ if (!IsPic && !ExtraLoadRequired) {
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Offset = 0;
+ } else {
+ unsigned char OpFlags = 0;
+ if (ExtraLoadRequired)
+ OpFlags = SystemZII::MO_GOTENT;
+
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+
+ Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
+ getPointerTy(), Result);
+
+ if (ExtraLoadRequired)
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+// FIXME: PIC here
+SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+
+ return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+
+// FIXME: PIC here
+// FIXME: This is just dirty hack. We need to lower cpool properly
+SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment(),
+ CP->getOffset());
+
+ return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case SystemZISD::RET_FLAG: return "SystemZISD::RET_FLAG";
+ case SystemZISD::CALL: return "SystemZISD::CALL";
+ case SystemZISD::BRCOND: return "SystemZISD::BRCOND";
+ case SystemZISD::CMP: return "SystemZISD::CMP";
+ case SystemZISD::UCMP: return "SystemZISD::UCMP";
+ case SystemZISD::SELECT: return "SystemZISD::SELECT";
+ case SystemZISD::PCRelativeWrapper: return "SystemZISD::PCRelativeWrapper";
+ default: return NULL;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const SystemZInstrInfo &TII = *TM.getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == SystemZ::Select32 ||
+ MI->getOpcode() == SystemZ::SelectF32 ||
+ MI->getOpcode() == SystemZ::Select64 ||
+ MI->getOpcode() == SystemZ::SelectF64) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 0000000..bab3dc2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,145 @@
+//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace SystemZISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// CALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// PCRelativeWrapper - PC relative address
+ PCRelativeWrapper,
+
+ /// CMP, UCMP - Compare instruction
+ CMP,
+ UCMP,
+
+ /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
+ /// the block to branch if condition is true, operand 2 is condition code
+ /// and operand 3 is the flag operand produced by a CMP instruction.
+ BRCOND,
+
+ /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
+ /// condition code and operand 3 is the flag operand.
+ SELECT
+ };
+ }
+
+ class SystemZSubtarget;
+ class SystemZTargetMachine;
+
+ class SystemZTargetLowering : public TargetLowering {
+ public:
+ explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+ TargetLowering::ConstraintType
+ getConstraintType(const std::string &Constraint) const;
+
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue EmitCmp(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &SystemZCC,
+ SelectionDAG &DAG) const;
+
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ private:
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ const SystemZSubtarget &Subtarget;
+ const SystemZTargetMachine &TM;
+ const SystemZRegisterInfo *RegInfo;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 0000000..ab45ec5
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,128 @@
+//===- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Displacement, Index.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with R15 or R11 and Disp being offsetted accordingly.
+struct SystemZAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned IndexReg;
+ int32_t Disp;
+ const GlobalValue *GV;
+
+ SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
+ Base.Reg = 0;
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no index or displacement.
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+ // Because memory references are always represented with 3
+ // values, this adds: Reg, [0, NoReg] to the instruction.
+ return MIB.addReg(Reg).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+ return MIB.addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no or index, but with a
+/// displacement. An example is: 10(%r15).
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &
+addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
+ .addReg(Reg2, getKillRegState(isKill2));
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
+ if (AM.BaseType == SystemZAddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ else
+ assert(0);
+
+ return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Flags = 0;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI), Offset),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 0000000..a658280
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,340 @@
+//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ (binary) floating point instructions in
+// TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: multiclassify!
+
+//===----------------------------------------------------------------------===//
+// FP Pattern fragments
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+ def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
+ "# SelectF32 PSEUDO",
+ [(set FP32:$dst,
+ (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
+ def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
+ "# SelectF64 PSEUDO",
+ [(set FP64:$dst,
+ (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// Floating point constant loads.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
+ "lzer\t{$dst}",
+ [(set FP32:$dst, fpimm0)]>;
+def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
+ "lzdr\t{$dst}",
+ [(set FP64:$dst, fpimm0)]>;
+}
+
+let neverHasSideEffects = 1 in {
+def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "ler\t{$dst, $src}",
+ []>;
+def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "ldr\t{$dst, $src}",
+ []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+ "le\t{$dst, $src}",
+ [(set FP32:$dst, (load rriaddr12:$src))]>;
+def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
+ "ley\t{$dst, $src}",
+ [(set FP32:$dst, (load rriaddr:$src))]>;
+def FMOV64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+ "ld\t{$dst, $src}",
+ [(set FP64:$dst, (load rriaddr12:$src))]>;
+def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
+ "ldy\t{$dst, $src}",
+ [(set FP64:$dst, (load rriaddr:$src))]>;
+}
+
+def FMOV32mr : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
+ "ste\t{$src, $dst}",
+ [(store FP32:$src, rriaddr12:$dst)]>;
+def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
+ "stey\t{$src, $dst}",
+ [(store FP32:$src, rriaddr:$dst)]>;
+def FMOV64mr : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
+ "std\t{$src, $dst}",
+ [(store FP64:$src, rriaddr12:$dst)]>;
+def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
+ "stdy\t{$src, $dst}",
+ [(store FP64:$src, rriaddr:$dst)]>;
+
+def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "cpsdr\t{$dst, $src2, $src1}",
+ [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
+def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "cpsdr\t{$dst, $src2, $src1}",
+ [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+
+let Defs = [PSW] in {
+def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "lcebr\t{$dst, $src}",
+ [(set FP32:$dst, (fneg FP32:$src)),
+ (implicit PSW)]>;
+def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "lcdbr\t{$dst, $src}",
+ [(set FP64:$dst, (fneg FP64:$src)),
+ (implicit PSW)]>;
+
+def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "lpebr\t{$dst, $src}",
+ [(set FP32:$dst, (fabs FP32:$src)),
+ (implicit PSW)]>;
+def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "lpdbr\t{$dst, $src}",
+ [(set FP64:$dst, (fabs FP64:$src)),
+ (implicit PSW)]>;
+
+def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "lnebr\t{$dst, $src}",
+ [(set FP32:$dst, (fneg(fabs FP32:$src))),
+ (implicit PSW)]>;
+def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "lndbr\t{$dst, $src}",
+ [(set FP64:$dst, (fneg(fabs FP64:$src))),
+ (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+let Defs = [PSW] in {
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "aebr\t{$dst, $src2}",
+ [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
+ (implicit PSW)]>;
+def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "adbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
+ (implicit PSW)]>;
+}
+
+def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "aeb\t{$dst, $src2}",
+ [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "adb\t{$dst, $src2}",
+ [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+
+def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "sebr\t{$dst, $src2}",
+ [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
+ (implicit PSW)]>;
+def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "sdbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
+ (implicit PSW)]>;
+
+def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "seb\t{$dst, $src2}",
+ [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "sdb\t{$dst, $src2}",
+ [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "meebr\t{$dst, $src2}",
+ [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
+def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "mdbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
+}
+
+def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "meeb\t{$dst, $src2}",
+ [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
+def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "mdb\t{$dst, $src2}",
+ [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
+
+def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+ "maebr\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
+ FP32:$src1))]>;
+def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+ "maeb\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
+ FP32:$src3),
+ FP32:$src1))]>;
+
+def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+ "madbr\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
+ FP64:$src1))]>;
+def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+ "madb\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
+ FP64:$src3),
+ FP64:$src1))]>;
+
+def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+ "msebr\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
+ FP32:$src1))]>;
+def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+ "mseb\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
+ FP32:$src3),
+ FP32:$src1))]>;
+
+def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+ "msdbr\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
+ FP64:$src1))]>;
+def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+ "msdb\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
+ FP64:$src3),
+ FP64:$src1))]>;
+
+def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "debr\t{$dst, $src2}",
+ [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
+def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "ddbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
+
+def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "deb\t{$dst, $src2}",
+ [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
+def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "ddb\t{$dst, $src2}",
+ [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
+
+} // Constraints = "$src1 = $dst"
+
+def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "sqebr\t{$dst, $src}",
+ [(set FP32:$dst, (fsqrt FP32:$src))]>;
+def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "sqdbr\t{$dst, $src}",
+ [(set FP64:$dst, (fsqrt FP64:$src))]>;
+
+def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+ "sqeb\t{$dst, $src}",
+ [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
+def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+ "sqdb\t{$dst, $src}",
+ [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
+
+def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
+ "ledbr\t{$dst, $src}",
+ [(set FP32:$dst, (fround FP64:$src))]>;
+
+def FEXT32r64 : Pseudo<(outs FP64:$dst), (ins FP32:$src),
+ "ldebr\t{$dst, $src}",
+ [(set FP64:$dst, (fextend FP32:$src))]>;
+def FEXT32m64 : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+ "ldeb\t{$dst, $src}",
+ [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
+
+let Defs = [PSW] in {
+def FCONVFP32 : Pseudo<(outs FP32:$dst), (ins GR32:$src),
+ "cefbr\t{$dst, $src}",
+ [(set FP32:$dst, (sint_to_fp GR32:$src)),
+ (implicit PSW)]>;
+def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
+ "cegbr\t{$dst, $src}",
+ [(set FP32:$dst, (sint_to_fp GR64:$src)),
+ (implicit PSW)]>;
+
+def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
+ "cdfbr\t{$dst, $src}",
+ [(set FP64:$dst, (sint_to_fp GR32:$src)),
+ (implicit PSW)]>;
+def FCONVFP64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+ "cdgbr\t{$dst, $src}",
+ [(set FP64:$dst, (sint_to_fp GR64:$src)),
+ (implicit PSW)]>;
+
+def FCONVGR32 : Pseudo<(outs GR32:$dst), (ins FP32:$src),
+ "cfebr\t{$dst, 5, $src}",
+ [(set GR32:$dst, (fp_to_sint FP32:$src)),
+ (implicit PSW)]>;
+def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
+ "cfdbr\t{$dst, 5, $src}",
+ [(set GR32:$dst, (fp_to_sint FP64:$src)),
+ (implicit PSW)]>;
+
+def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
+ "cgebr\t{$dst, 5, $src}",
+ [(set GR64:$dst, (fp_to_sint FP32:$src)),
+ (implicit PSW)]>;
+def FCONVGR64 : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+ "cgdbr\t{$dst, 5, $src}",
+ [(set GR64:$dst, (fp_to_sint FP64:$src)),
+ (implicit PSW)]>;
+} // Defs = [PSW]
+
+def FBCONVG64 : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+ "lgdr\t{$dst, $src}",
+ [(set GR64:$dst, (bitconvert FP64:$src))]>;
+def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+ "ldgr\t{$dst, $src}",
+ [(set FP64:$dst, (bitconvert GR64:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
+ "cebr\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
+def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
+ "cdbr\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
+
+def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
+ "ceb\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP32:$src1,
+ (load rriaddr12:$src2)))]>;
+def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
+ "cdb\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP64:$src1,
+ (load rriaddr12:$src2)))]>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Floating point constant -0.0
+def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
+def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 0000000..b4a8993
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,133 @@
+//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+ bits<5> Value = val;
+}
+
+def Pseudo : Format<0>;
+def EForm : Format<1>;
+def IForm : Format<2>;
+def RIForm : Format<3>;
+def RIEForm : Format<4>;
+def RILForm : Format<5>;
+def RISForm : Format<6>;
+def RRForm : Format<7>;
+def RREForm : Format<8>;
+def RRFForm : Format<9>;
+def RRRForm : Format<10>;
+def RRSForm : Format<11>;
+def RSForm : Format<12>;
+def RSIForm : Format<13>;
+def RSILForm : Format<14>;
+def RSYForm : Format<15>;
+def RXForm : Format<16>;
+def RXEForm : Format<17>;
+def RXFForm : Format<18>;
+def RXYForm : Format<19>;
+def SForm : Format<20>;
+def SIForm : Format<21>;
+def SILForm : Format<22>;
+def SIYForm : Format<23>;
+def SSForm : Format<24>;
+def SSEForm : Format<25>;
+def SSFForm : Format<26>;
+
+class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
+ let Namespace = "SystemZ";
+
+ bits<16> Opcode = op;
+
+ Format Form = f;
+ bits<5> FormBits = Form.Value;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+}
+
+class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : InstSystemZ<0, f, outs, ins> {
+ let Opcode{0-7} = op;
+ let Opcode{8-15} = 0;
+
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
+
+class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : InstSystemZ<0, f, outs, ins> {
+ let Opcode{0-11} = op;
+ let Opcode{12-15} = 0;
+
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
+
+class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : InstSystemZ<op, f, outs, ins> {
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
+
+class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, RRForm, outs, ins, asmstr, pattern>;
+
+class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I12<op, RIForm, outs, ins, asmstr, pattern>;
+
+class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I12<op, RILForm, outs, ins, asmstr, pattern>;
+
+class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, RREForm, outs, ins, asmstr, pattern>;
+
+class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, RXForm, outs, ins, asmstr, pattern> {
+ let AddedComplexity = 1;
+}
+
+class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, RXYForm, outs, ins, asmstr, pattern>;
+
+class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, RSForm, outs, ins, asmstr, pattern> {
+ let AddedComplexity = 1;
+}
+
+class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, RSYForm, outs, ins, asmstr, pattern>;
+
+class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, SIForm, outs, ins, asmstr, pattern> {
+ let AddedComplexity = 1;
+}
+
+class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, SIYForm, outs, ins, asmstr, pattern>;
+
+class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, SILForm, outs, ins, asmstr, pattern>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<0, Pseudo, outs, ins> {
+
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 0000000..5f3dd80
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,439 @@
+//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SystemZGenInstrInfo.inc"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+ : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
+ RI(tm, *this), TM(tm) {
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
+ return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ unsigned Opc = 0;
+ if (RC == &SystemZ::GR32RegClass ||
+ RC == &SystemZ::ADDR32RegClass)
+ Opc = SystemZ::MOV32mr;
+ else if (RC == &SystemZ::GR64RegClass ||
+ RC == &SystemZ::ADDR64RegClass) {
+ Opc = SystemZ::MOV64mr;
+ } else if (RC == &SystemZ::FP32RegClass) {
+ Opc = SystemZ::FMOV32mr;
+ } else if (RC == &SystemZ::FP64RegClass) {
+ Opc = SystemZ::FMOV64mr;
+ } else if (RC == &SystemZ::GR64PRegClass) {
+ Opc = SystemZ::MOV64Pmr;
+ } else if (RC == &SystemZ::GR128RegClass) {
+ Opc = SystemZ::MOV128mr;
+ } else
+ llvm_unreachable("Unsupported regclass to store");
+
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ unsigned Opc = 0;
+ if (RC == &SystemZ::GR32RegClass ||
+ RC == &SystemZ::ADDR32RegClass)
+ Opc = SystemZ::MOV32rm;
+ else if (RC == &SystemZ::GR64RegClass ||
+ RC == &SystemZ::ADDR64RegClass) {
+ Opc = SystemZ::MOV64rm;
+ } else if (RC == &SystemZ::FP32RegClass) {
+ Opc = SystemZ::FMOV32rm;
+ } else if (RC == &SystemZ::FP64RegClass) {
+ Opc = SystemZ::FMOV64rm;
+ } else if (RC == &SystemZ::GR64PRegClass) {
+ Opc = SystemZ::MOV64Prm;
+ } else if (RC == &SystemZ::GR128RegClass) {
+ Opc = SystemZ::MOV128rm;
+ } else
+ llvm_unreachable("Unsupported regclass to load");
+
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rr;
+ else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV32rr;
+ else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rrP;
+ else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV128rr;
+ else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV32rr;
+ else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV64rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SystemZ::MOV32rm:
+ case SystemZ::MOV32rmy:
+ case SystemZ::MOV64rm:
+ case SystemZ::MOVSX32rm8:
+ case SystemZ::MOVSX32rm16y:
+ case SystemZ::MOVSX64rm8:
+ case SystemZ::MOVSX64rm16:
+ case SystemZ::MOVSX64rm32:
+ case SystemZ::MOVZX32rm8:
+ case SystemZ::MOVZX32rm16:
+ case SystemZ::MOVZX64rm8:
+ case SystemZ::MOVZX64rm16:
+ case SystemZ::MOVZX64rm32:
+ case SystemZ::FMOV32rm:
+ case SystemZ::FMOV32rmy:
+ case SystemZ::FMOV64rm:
+ case SystemZ::FMOV64rmy:
+ case SystemZ::MOV64Prm:
+ case SystemZ::MOV64Prmy:
+ case SystemZ::MOV128rm:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
+ MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SystemZ::MOV32mr:
+ case SystemZ::MOV32mry:
+ case SystemZ::MOV64mr:
+ case SystemZ::MOV32m8r:
+ case SystemZ::MOV32m8ry:
+ case SystemZ::MOV32m16r:
+ case SystemZ::MOV32m16ry:
+ case SystemZ::MOV64m8r:
+ case SystemZ::MOV64m8ry:
+ case SystemZ::MOV64m16r:
+ case SystemZ::MOV64m16ry:
+ case SystemZ::MOV64m32r:
+ case SystemZ::MOV64m32ry:
+ case SystemZ::FMOV32mr:
+ case SystemZ::FMOV32mry:
+ case SystemZ::FMOV64mr:
+ case SystemZ::FMOV64mry:
+ case SystemZ::MOV64Pmr:
+ case SystemZ::MOV64Pmry:
+ case SystemZ::MOV128mr:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
+ MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(3).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+ SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
+ Cond[0].setImm(getOppositeCondition(CC));
+ return false;
+}
+
+bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == SystemZ::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == SystemZCC::INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != SystemZ::JMP &&
+ getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "SystemZ branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
+ BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+const MCInstrDesc&
+SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case SystemZCC::O: return get(SystemZ::JO);
+ case SystemZCC::H: return get(SystemZ::JH);
+ case SystemZCC::NLE: return get(SystemZ::JNLE);
+ case SystemZCC::L: return get(SystemZ::JL);
+ case SystemZCC::NHE: return get(SystemZ::JNHE);
+ case SystemZCC::LH: return get(SystemZ::JLH);
+ case SystemZCC::NE: return get(SystemZ::JNE);
+ case SystemZCC::E: return get(SystemZ::JE);
+ case SystemZCC::NLH: return get(SystemZ::JNLH);
+ case SystemZCC::HE: return get(SystemZ::JHE);
+ case SystemZCC::NL: return get(SystemZ::JNL);
+ case SystemZCC::LE: return get(SystemZ::JLE);
+ case SystemZCC::NH: return get(SystemZ::JNH);
+ case SystemZCC::NO: return get(SystemZ::JNO);
+ }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: return SystemZCC::INVALID;
+ case SystemZ::JO: return SystemZCC::O;
+ case SystemZ::JH: return SystemZCC::H;
+ case SystemZ::JNLE: return SystemZCC::NLE;
+ case SystemZ::JL: return SystemZCC::L;
+ case SystemZ::JNHE: return SystemZCC::NHE;
+ case SystemZ::JLH: return SystemZCC::LH;
+ case SystemZ::JNE: return SystemZCC::NE;
+ case SystemZ::JE: return SystemZCC::E;
+ case SystemZ::JNLH: return SystemZCC::NLH;
+ case SystemZ::JHE: return SystemZCC::HE;
+ case SystemZ::JNL: return SystemZCC::NL;
+ case SystemZ::JLE: return SystemZCC::LE;
+ case SystemZ::JNH: return SystemZCC::NH;
+ case SystemZ::JNO: return SystemZCC::NO;
+ }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Invalid condition!");
+ case SystemZCC::O: return SystemZCC::NO;
+ case SystemZCC::H: return SystemZCC::NH;
+ case SystemZCC::NLE: return SystemZCC::LE;
+ case SystemZCC::L: return SystemZCC::NL;
+ case SystemZCC::NHE: return SystemZCC::HE;
+ case SystemZCC::LH: return SystemZCC::NLH;
+ case SystemZCC::NE: return SystemZCC::E;
+ case SystemZCC::E: return SystemZCC::NE;
+ case SystemZCC::NLH: return SystemZCC::LH;
+ case SystemZCC::HE: return SystemZCC::NHE;
+ case SystemZCC::NL: return SystemZCC::L;
+ case SystemZCC::LE: return SystemZCC::NLE;
+ case SystemZCC::NH: return SystemZCC::H;
+ case SystemZCC::NO: return SystemZCC::O;
+ }
+}
+
+const MCInstrDesc&
+SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Don't have long disp version of this instruction");
+ case SystemZ::MOV32mr: return get(SystemZ::MOV32mry);
+ case SystemZ::MOV32rm: return get(SystemZ::MOV32rmy);
+ case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
+ case SystemZ::MOV32m8r: return get(SystemZ::MOV32m8ry);
+ case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
+ case SystemZ::MOV64m8r: return get(SystemZ::MOV64m8ry);
+ case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
+ case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
+ case SystemZ::MOV8mi: return get(SystemZ::MOV8miy);
+ case SystemZ::MUL32rm: return get(SystemZ::MUL32rmy);
+ case SystemZ::CMP32rm: return get(SystemZ::CMP32rmy);
+ case SystemZ::UCMP32rm: return get(SystemZ::UCMP32rmy);
+ case SystemZ::FMOV32mr: return get(SystemZ::FMOV32mry);
+ case SystemZ::FMOV64mr: return get(SystemZ::FMOV64mry);
+ case SystemZ::FMOV32rm: return get(SystemZ::FMOV32rmy);
+ case SystemZ::FMOV64rm: return get(SystemZ::FMOV64rmy);
+ case SystemZ::MOV64Pmr: return get(SystemZ::MOV64Pmry);
+ case SystemZ::MOV64Prm: return get(SystemZ::MOV64Prmy);
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 0000000..6a31e94
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,113 @@
+//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+/// SystemZII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SystemZII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // SystemZ Specific MachineOperand flags.
+
+ MO_NO_FLAG = 0,
+
+ /// MO_GOTENT - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// SYMBOL_LABEL @GOTENT
+ MO_GOTENT = 1,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// SYMBOL_LABEL @PLT
+ MO_PLT = 2
+ };
+}
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+ const SystemZRegisterInfo RI;
+ SystemZTargetMachine &TM;
+public:
+ explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
+ SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
+ const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+ const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
+
+ const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+ if (Offset < 0 || Offset >= 4096)
+ return getLongDispOpc(Opc);
+ else
+ return get(Opc);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 0000000..580d65b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,1147 @@
+//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Instruction Predicate Definitions.
+def IsZ10 : Predicate<"Subtarget.isZ10()">;
+
+include "SystemZInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
+class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
+def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
+def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisI8<1>, SDTCisVT<2, i64>]>;
+def SDT_SelectCC : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisI8<3>, SDTCisVT<4, i64>]>;
+def SDT_Address : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def SystemZcallseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def SystemZcallseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
+def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
+def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
+ [SDNPHasChain]>;
+def SystemZselect : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
+def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
+
+
+include "SystemZOperands.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(SystemZcallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+ def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
+ "# Select32 PSEUDO",
+ [(set GR32:$dst,
+ (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
+ def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
+ "# Select64 PSEUDO",
+ [(set GR64:$dst,
+ (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ let isBarrier = 1 in {
+ def JMP : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
+
+ let isIndirectBranch = 1 in
+ def JMPr : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
+ }
+
+ let Uses = [PSW] in {
+ def JO : Pseudo<(outs), (ins brtarget:$dst),
+ "jo\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
+ def JH : Pseudo<(outs), (ins brtarget:$dst),
+ "jh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
+ def JNLE: Pseudo<(outs), (ins brtarget:$dst),
+ "jnle\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
+ def JL : Pseudo<(outs), (ins brtarget:$dst),
+ "jl\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
+ def JNHE: Pseudo<(outs), (ins brtarget:$dst),
+ "jnhe\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
+ def JLH : Pseudo<(outs), (ins brtarget:$dst),
+ "jlh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
+ def JNE : Pseudo<(outs), (ins brtarget:$dst),
+ "jne\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
+ def JE : Pseudo<(outs), (ins brtarget:$dst),
+ "je\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
+ def JNLH: Pseudo<(outs), (ins brtarget:$dst),
+ "jnlh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
+ def JHE : Pseudo<(outs), (ins brtarget:$dst),
+ "jhe\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
+ def JNL : Pseudo<(outs), (ins brtarget:$dst),
+ "jnl\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
+ def JLE : Pseudo<(outs), (ins brtarget:$dst),
+ "jle\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
+ def JNH : Pseudo<(outs), (ins brtarget:$dst),
+ "jnh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
+ def JNO : Pseudo<(outs), (ins brtarget:$dst),
+ "jno\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
+ } // Uses = [PSW]
+} // isBranch = 1
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. Uses for argument
+ // registers are added manually.
+ let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+ F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
+ def CALLi : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
+ "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
+ def CALLr : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
+ "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
+ }
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let isReMaterializable = 1 in
+// FIXME: Provide imm12 variant
+// FIXME: Address should be halfword aligned...
+def LA64r : RXI<0x47,
+ (outs GR64:$dst), (ins laaddr:$src),
+ "lay\t{$dst, $src}",
+ [(set GR64:$dst, laaddr:$src)]>;
+def LA64rm : RXYI<0x71E3,
+ (outs GR64:$dst), (ins i64imm:$src),
+ "larl\t{$dst, $src}",
+ [(set GR64:$dst,
+ (SystemZpcrelwrapper tglobaladdr:$src))]>;
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "# no-op", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+let neverHasSideEffects = 1 in {
+def MOV32rr : RRI<0x18,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lr\t{$dst, $src}",
+ []>;
+def MOV64rr : RREI<0xB904,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lgr\t{$dst, $src}",
+ []>;
+def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+ "# MOV128 PSEUDO!\n"
+ "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+ "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
+ []>;
+def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+ "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
+ []>;
+}
+
+def MOVSX64rr32 : RREI<0xB914,
+ (outs GR64:$dst), (ins GR32:$src),
+ "lgfr\t{$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVZX64rr32 : RREI<0xB916,
+ (outs GR64:$dst), (ins GR32:$src),
+ "llgfr\t{$dst, $src}",
+ [(set GR64:$dst, (zext GR32:$src))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV32ri16 : RII<0x8A7,
+ (outs GR32:$dst), (ins s16imm:$src),
+ "lhi\t{$dst, $src}",
+ [(set GR32:$dst, immSExt16:$src)]>;
+def MOV64ri16 : RII<0x9A7,
+ (outs GR64:$dst), (ins s16imm64:$src),
+ "lghi\t{$dst, $src}",
+ [(set GR64:$dst, immSExt16:$src)]>;
+
+def MOV64rill16 : RII<0xFA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llill\t{$dst, $src}",
+ [(set GR64:$dst, i64ll16:$src)]>;
+def MOV64rilh16 : RII<0xEA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llilh\t{$dst, $src}",
+ [(set GR64:$dst, i64lh16:$src)]>;
+def MOV64rihl16 : RII<0xDA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llihl\t{$dst, $src}",
+ [(set GR64:$dst, i64hl16:$src)]>;
+def MOV64rihh16 : RII<0xCA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llihh\t{$dst, $src}",
+ [(set GR64:$dst, i64hh16:$src)]>;
+
+def MOV64ri32 : RILI<0x1C0,
+ (outs GR64:$dst), (ins s32imm64:$src),
+ "lgfi\t{$dst, $src}",
+ [(set GR64:$dst, immSExt32:$src)]>;
+def MOV64rilo32 : RILI<0xFC0,
+ (outs GR64:$dst), (ins u32imm:$src),
+ "llilf\t{$dst, $src}",
+ [(set GR64:$dst, i64lo32:$src)]>;
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
+ "llihf\t{$dst, $src}",
+ [(set GR64:$dst, i64hi32:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV32rm : RXI<0x58,
+ (outs GR32:$dst), (ins rriaddr12:$src),
+ "l\t{$dst, $src}",
+ [(set GR32:$dst, (load rriaddr12:$src))]>;
+def MOV32rmy : RXYI<0x58E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "ly\t{$dst, $src}",
+ [(set GR32:$dst, (load rriaddr:$src))]>;
+def MOV64rm : RXYI<0x04E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lg\t{$dst, $src}",
+ [(set GR64:$dst, (load rriaddr:$src))]>;
+def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tl\t${dst:subreg_odd}, $src\n"
+ "\tl\t${dst:subreg_even}, 4+$src",
+ [(set GR64P:$dst, (load rriaddr12:$src))]>;
+def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tly\t${dst:subreg_odd}, $src\n"
+ "\tly\t${dst:subreg_even}, 4+$src",
+ [(set GR64P:$dst, (load rriaddr:$src))]>;
+def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
+ "# MOV128 PSEUDO!\n"
+ "\tlg\t${dst:subreg_odd}, $src\n"
+ "\tlg\t${dst:subreg_even}, 8+$src",
+ [(set GR128:$dst, (load rriaddr:$src))]>;
+}
+
+def MOV32mr : RXI<0x50,
+ (outs), (ins rriaddr12:$dst, GR32:$src),
+ "st\t{$src, $dst}",
+ [(store GR32:$src, rriaddr12:$dst)]>;
+def MOV32mry : RXYI<0x50E3,
+ (outs), (ins rriaddr:$dst, GR32:$src),
+ "sty\t{$src, $dst}",
+ [(store GR32:$src, rriaddr:$dst)]>;
+def MOV64mr : RXYI<0x24E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "stg\t{$src, $dst}",
+ [(store GR64:$src, rriaddr:$dst)]>;
+def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tst\t${src:subreg_odd}, $dst\n"
+ "\tst\t${src:subreg_even}, 4+$dst",
+ [(store GR64P:$src, rriaddr12:$dst)]>;
+def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tsty\t${src:subreg_odd}, $dst\n"
+ "\tsty\t${src:subreg_even}, 4+$dst",
+ [(store GR64P:$src, rriaddr:$dst)]>;
+def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
+ "# MOV128 PSEUDO!\n"
+ "\tstg\t${src:subreg_odd}, $dst\n"
+ "\tstg\t${src:subreg_even}, 8+$dst",
+ [(store GR128:$src, rriaddr:$dst)]>;
+
+def MOV8mi : SII<0x92,
+ (outs), (ins riaddr12:$dst, i32i8imm:$src),
+ "mvi\t{$dst, $src}",
+ [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
+def MOV8miy : SIYI<0x52EB,
+ (outs), (ins riaddr:$dst, i32i8imm:$src),
+ "mviy\t{$dst, $src}",
+ [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
+
+let AddedComplexity = 2 in {
+def MOV16mi : SILI<0xE544,
+ (outs), (ins riaddr12:$dst, s16imm:$src),
+ "mvhhi\t{$dst, $src}",
+ [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
+ Requires<[IsZ10]>;
+def MOV32mi16 : SILI<0xE54C,
+ (outs), (ins riaddr12:$dst, s32imm:$src),
+ "mvhi\t{$dst, $src}",
+ [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
+ Requires<[IsZ10]>;
+def MOV64mi16 : SILI<0xE548,
+ (outs), (ins riaddr12:$dst, s32imm64:$src),
+ "mvghi\t{$dst, $src}",
+ [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
+ Requires<[IsZ10]>;
+}
+
+// sexts
+def MOVSX32rr8 : RREI<0xB926,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lbr\t{$dst, $src}",
+ [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
+def MOVSX64rr8 : RREI<0xB906,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lgbr\t{$dst, $src}",
+ [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
+def MOVSX32rr16 : RREI<0xB927,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lhr\t{$dst, $src}",
+ [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
+def MOVSX64rr16 : RREI<0xB907,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lghr\t{$dst, $src}",
+ [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
+
+// extloads
+def MOVSX32rm8 : RXYI<0x76E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "lb\t{$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
+def MOVSX32rm16 : RXI<0x48,
+ (outs GR32:$dst), (ins rriaddr12:$src),
+ "lh\t{$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
+def MOVSX32rm16y : RXYI<0x78E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "lhy\t{$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
+def MOVSX64rm8 : RXYI<0x77E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lgb\t{$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
+def MOVSX64rm16 : RXYI<0x15E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lgh\t{$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
+def MOVSX64rm32 : RXYI<0x14E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lgf\t{$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
+
+def MOVZX32rm8 : RXYI<0x94E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "llc\t{$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
+def MOVZX32rm16 : RXYI<0x95E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "llh\t{$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
+def MOVZX64rm8 : RXYI<0x90E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "llgc\t{$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
+def MOVZX64rm16 : RXYI<0x91E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "llgh\t{$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
+def MOVZX64rm32 : RXYI<0x16E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "llgf\t{$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
+
+// truncstores
+def MOV32m8r : RXI<0x42,
+ (outs), (ins rriaddr12:$dst, GR32:$src),
+ "stc\t{$src, $dst}",
+ [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m8ry : RXYI<0x72E3,
+ (outs), (ins rriaddr:$dst, GR32:$src),
+ "stcy\t{$src, $dst}",
+ [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
+
+def MOV32m16r : RXI<0x40,
+ (outs), (ins rriaddr12:$dst, GR32:$src),
+ "sth\t{$src, $dst}",
+ [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m16ry : RXYI<0x70E3,
+ (outs), (ins rriaddr:$dst, GR32:$src),
+ "sthy\t{$src, $dst}",
+ [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
+
+def MOV64m8r : RXI<0x42,
+ (outs), (ins rriaddr12:$dst, GR64:$src),
+ "stc\t{$src, $dst}",
+ [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m8ry : RXYI<0x72E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "stcy\t{$src, $dst}",
+ [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m16r : RXI<0x40,
+ (outs), (ins rriaddr12:$dst, GR64:$src),
+ "sth\t{$src, $dst}",
+ [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m16ry : RXYI<0x70E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "sthy\t{$src, $dst}",
+ [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m32r : RXI<0x50,
+ (outs), (ins rriaddr12:$dst, GR64:$src),
+ "st\t{$src, $dst}",
+ [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m32ry : RXYI<0x50E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "sty\t{$src, $dst}",
+ [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
+
+// multiple regs moves
+// FIXME: should we use multiple arg nodes?
+def MOV32mrm : RSYI<0x90EB,
+ (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
+ "stmy\t{$from, $to, $dst}",
+ []>;
+def MOV64mrm : RSYI<0x24EB,
+ (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
+ "stmg\t{$from, $to, $dst}",
+ []>;
+def MOV32rmm : RSYI<0x90EB,
+ (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
+ "lmy\t{$from, $to, $dst}",
+ []>;
+def MOV64rmm : RSYI<0x04EB,
+ (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
+ "lmg\t{$from, $to, $dst}",
+ []>;
+
+let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1,
+ Constraints = "$src = $dst" in {
+def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+ "lhi\t${dst:subreg_even}, 0",
+ []>;
+def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+ "lghi\t${dst:subreg_even}, 0",
+ []>;
+}
+
+// Byte swaps
+def BSWAP32rr : RREI<0xB91F,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lrvr\t{$dst, $src}",
+ [(set GR32:$dst, (bswap GR32:$src))]>;
+def BSWAP64rr : RREI<0xB90F,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lrvgr\t{$dst, $src}",
+ [(set GR64:$dst, (bswap GR64:$src))]>;
+
+// FIXME: this is invalid pattern for big-endian
+//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
+// "lrvh\t{$dst, $src}",
+// [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
+def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
+ "lrv\t{$dst, $src}",
+ [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
+def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
+ "lrvg\t{$dst, $src}",
+ [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
+
+//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
+// "strvh\t{$src, $dst}",
+// [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
+ "strv\t{$src, $dst}",
+ [(store (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
+ "strvg\t{$src, $dst}",
+ [(store (bswap GR64:$src), rriaddr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Defs = [PSW] in {
+def NEG32rr : RRI<0x13,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lcr\t{$dst, $src}",
+ [(set GR32:$dst, (ineg GR32:$src)),
+ (implicit PSW)]>;
+def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
+ "lcgr\t{$dst, $src}",
+ [(set GR64:$dst, (ineg GR64:$src)),
+ (implicit PSW)]>;
+def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
+ "lcgfr\t{$dst, $src}",
+ [(set GR64:$dst, (ineg (sext GR32:$src))),
+ (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+
+let Defs = [PSW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "ar\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+ (implicit PSW)]>;
+def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "agr\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+ (implicit PSW)]>;
+}
+
+def ADD32rm : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "a\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def ADD32rmy : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "ay\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def ADD64rm : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "ag\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+
+def ADD32ri16 : RII<0xA7A,
+ (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+ "ahi\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
+ (implicit PSW)]>;
+def ADD32ri : RILI<0xC29,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "afi\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+ (implicit PSW)]>;
+def ADD64ri16 : RILI<0xA7B,
+ (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+ "aghi\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
+ (implicit PSW)]>;
+def ADD64ri32 : RILI<0xC28,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "agfi\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
+ (implicit PSW)]>;
+
+let isCommutable = 1 in { // X = ADC Y, Z == X = ADC Z, Y
+def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "alr\t{$dst, $src2}",
+ [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
+def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "algr\t{$dst, $src2}",
+ [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
+}
+
+def ADC32ri : RILI<0xC2B,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "alfi\t{$dst, $src2}",
+ [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
+def ADC64ri32 : RILI<0xC2A,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "algfi\t{$dst, $src2}",
+ [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "alcr\t{$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
+ (implicit PSW)]>;
+def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "alcgr\t{$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
+ (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND32rr : RRI<0x14,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "nr\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
+def AND64rr : RREI<0xB980,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "ngr\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+}
+
+def AND32rm : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "n\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def AND32rmy : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "ny\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "ng\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+def AND32rill16 : RII<0xA57,
+ (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+ "nill\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
+def AND64rill16 : RII<0xA57,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nill\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
+
+def AND32rilh16 : RII<0xA56,
+ (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+ "nilh\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
+def AND64rilh16 : RII<0xA56,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nilh\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
+
+def AND64rihl16 : RII<0xA55,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nihl\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
+def AND64rihh16 : RII<0xA54,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nihh\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
+
+def AND32ri : RILI<0xC0B,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "nilf\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
+def AND64rilo32 : RILI<0xC0B,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "nilf\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
+def AND64rihi32 : RILI<0xC0A,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "nihf\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR32rr : RRI<0x16,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "or\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
+def OR64rr : RREI<0xB981,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "ogr\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+}
+
+def OR32rm : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "o\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def OR32rmy : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "oy\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "og\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+ // FIXME: Provide proper encoding!
+def OR32ri16 : RII<0xA5B,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "oill\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
+def OR32ri16h : RII<0xA5A,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "oilh\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
+def OR32ri : RILI<0xC0D,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "oilf\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
+
+def OR64rill16 : RII<0xA5B,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oill\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
+def OR64rilh16 : RII<0xA5A,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oilh\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
+def OR64rihl16 : RII<0xA59,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oihl\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
+def OR64rihh16 : RII<0xA58,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oihh\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
+
+def OR64rilo32 : RILI<0xC0D,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "oilf\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
+def OR64rihi32 : RILI<0xC0C,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "oihf\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
+
+def SUB32rr : RRI<0x1B,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "sr\t{$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+def SUB64rr : RREI<0xB909,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "sgr\t{$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB32rm : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "s\t{$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def SUB32rmy : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "sy\t{$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def SUB64rm : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "sg\t{$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+def SBC32rr : RRI<0x1F,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "slr\t{$dst, $src2}",
+ [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
+def SBC64rr : RREI<0xB90B,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "slgr\t{$dst, $src2}",
+ [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
+
+def SBC32ri : RILI<0xC25,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "sllfi\t{$dst, $src2}",
+ [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
+def SBC64ri32 : RILI<0xC24,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "slgfi\t{$dst, $src2}",
+ [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "slbr\t{$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
+ (implicit PSW)]>;
+def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "slbgr\t{$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
+ (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR32rr : RRI<0x17,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "xr\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+def XOR64rr : RREI<0xB982,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "xgr\t{$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+}
+
+def XOR32rm : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "x\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def XOR32rmy : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "xy\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def XOR64rm : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "xg\t{$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+def XOR32ri : RILI<0xC07,
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "xilf\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
+
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def MUL32rr : RREI<0xB252,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "msr\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
+def MUL64rr : RREI<0xB90C,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "msgr\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
+}
+
+def MUL64rrP : RRI<0x1C,
+ (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+ "mr\t{$dst, $src2}",
+ []>;
+def UMUL64rrP : RREI<0xB996,
+ (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+ "mlr\t{$dst, $src2}",
+ []>;
+def UMUL128rrP : RREI<0xB986,
+ (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+ "mlgr\t{$dst, $src2}",
+ []>;
+
+def MUL32ri16 : RII<0xA7C,
+ (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+ "mhi\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
+def MUL64ri16 : RII<0xA7D,
+ (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+ "mghi\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
+
+let AddedComplexity = 2 in {
+def MUL32ri : RILI<0xC21,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "msfi\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
+ Requires<[IsZ10]>;
+def MUL64ri32 : RILI<0xC20,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "msgfi\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
+ Requires<[IsZ10]>;
+}
+
+def MUL32rm : RXI<0x71,
+ (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "ms\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
+def MUL32rmy : RXYI<0xE351,
+ (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "msy\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
+def MUL64rm : RXYI<0xE30C,
+ (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "msg\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
+
+def MULSX64rr32 : RREI<0xB91C,
+ (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
+ "msgfr\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
+
+def SDIVREM32r : RREI<0xB91D,
+ (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
+ "dsgfr\t{$dst, $src2}",
+ []>;
+def SDIVREM64r : RREI<0xB90D,
+ (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+ "dsgr\t{$dst, $src2}",
+ []>;
+
+def UDIVREM32r : RREI<0xB997,
+ (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+ "dlr\t{$dst, $src2}",
+ []>;
+def UDIVREM64r : RREI<0xB987,
+ (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+ "dlgr\t{$dst, $src2}",
+ []>;
+let mayLoad = 1 in {
+def SDIVREM32m : RXYI<0xE31D,
+ (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+ "dsgf\t{$dst, $src2}",
+ []>;
+def SDIVREM64m : RXYI<0xE30D,
+ (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+ "dsg\t{$dst, $src2}",
+ []>;
+
+def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
+ "dl\t{$dst, $src2}",
+ []>;
+def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+ "dlg\t{$dst, $src2}",
+ []>;
+} // mayLoad
+} // Constraints = "$src1 = $dst"
+
+//===----------------------------------------------------------------------===//
+// Shifts
+
+let Constraints = "$src = $dst" in
+def SRL32rri : RSI<0x88,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "srl\t{$src, $amt}",
+ [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
+def SRL64rri : RSYI<0xEB0C,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "srlg\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
+
+let Constraints = "$src = $dst" in
+def SHL32rri : RSI<0x89,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "sll\t{$src, $amt}",
+ [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
+def SHL64rri : RSYI<0xEB0D,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "sllg\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
+
+let Defs = [PSW] in {
+let Constraints = "$src = $dst" in
+def SRA32rri : RSI<0x8A,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "sra\t{$src, $amt}",
+ [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
+ (implicit PSW)]>;
+
+def SRA64rri : RSYI<0xEB0A,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "srag\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
+ (implicit PSW)]>;
+} // Defs = [PSW]
+
+def ROTL32rri : RSYI<0xEB1D,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "rll\t{$dst, $src, $amt}",
+ [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
+def ROTL64rri : RSYI<0xEB1C,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "rllg\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def CMP32rr : RRI<0x19,
+ (outs), (ins GR32:$src1, GR32:$src2),
+ "cr\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>;
+def CMP64rr : RREI<0xB920,
+ (outs), (ins GR64:$src1, GR64:$src2),
+ "cgr\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
+
+def CMP32ri : RILI<0xC2D,
+ (outs), (ins GR32:$src1, s32imm:$src2),
+ "cfi\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
+def CMP64ri32 : RILI<0xC2C,
+ (outs), (ins GR64:$src1, s32imm64:$src2),
+ "cgfi\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
+
+def CMP32rm : RXI<0x59,
+ (outs), (ins GR32:$src1, rriaddr12:$src2),
+ "c\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
+def CMP32rmy : RXYI<0xE359,
+ (outs), (ins GR32:$src1, rriaddr:$src2),
+ "cy\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
+def CMP64rm : RXYI<0xE320,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "cg\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
+
+def UCMP32rr : RRI<0x15,
+ (outs), (ins GR32:$src1, GR32:$src2),
+ "clr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
+def UCMP64rr : RREI<0xB921,
+ (outs), (ins GR64:$src1, GR64:$src2),
+ "clgr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
+
+def UCMP32ri : RILI<0xC2F,
+ (outs), (ins GR32:$src1, i32imm:$src2),
+ "clfi\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
+def UCMP64ri32 : RILI<0xC2E,
+ (outs), (ins GR64:$src1, i64i32imm:$src2),
+ "clgfi\t$src1, $src2",
+ [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
+
+def UCMP32rm : RXI<0x55,
+ (outs), (ins GR32:$src1, rriaddr12:$src2),
+ "cl\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr12:$src2)))]>;
+def UCMP32rmy : RXYI<0xE355,
+ (outs), (ins GR32:$src1, rriaddr:$src2),
+ "cly\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr:$src2)))]>;
+def UCMP64rm : RXYI<0xE351,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "clg\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (load rriaddr:$src2)))]>;
+
+def CMPSX64rr32 : RREI<0xB930,
+ (outs), (ins GR64:$src1, GR32:$src2),
+ "cgfr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sext GR32:$src2)))]>;
+def UCMPZX64rr32 : RREI<0xB931,
+ (outs), (ins GR64:$src1, GR32:$src2),
+ "clgfr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zext GR32:$src2)))]>;
+
+def CMPSX64rm32 : RXYI<0xE330,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "cgf\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sextloadi64i32 rriaddr:$src2)))]>;
+def UCMPZX64rm32 : RXYI<0xE331,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "clgf\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zextloadi64i32 rriaddr:$src2)))]>;
+
+// FIXME: Add other crazy ucmp forms
+
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Other crazy stuff
+let Defs = [PSW] in {
+def FLOGR64 : RREI<0xB983,
+ (outs GR128:$dst), (ins GR64:$src),
+ "flogr\t{$dst, $src}",
+ []>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+// ConstPools, JumpTables
+def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
+def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
+
+// anyext
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// calls
+def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
+def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// FIXME: use add/sub tricks with 32678/-32768
+
+// Arbitrary immediate support.
+def : Pat<(i32 imm:$src),
+ (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))),
+ subreg_32bit)>;
+
+// Implement in terms of LLIHF/OILF.
+def : Pat<(i64 imm:$imm),
+ (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// extload patterns
+def : Pat<(extloadi32i8 rriaddr:$src), (MOVZX32rm8 rriaddr:$src)>;
+def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i8 rriaddr:$src), (MOVZX64rm8 rriaddr:$src)>;
+def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
+
+// muls
+def : Pat<(mulhs GR32:$src1, GR32:$src2),
+ (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ GR32:$src1, subreg_odd32),
+ GR32:$src2),
+ subreg_32bit)>;
+
+def : Pat<(mulhu GR32:$src1, GR32:$src2),
+ (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ GR32:$src1, subreg_odd32),
+ GR32:$src2),
+ subreg_32bit)>;
+def : Pat<(mulhu GR64:$src1, GR64:$src2),
+ (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ GR64:$src1, subreg_odd),
+ GR64:$src2),
+ subreg_even)>;
+
+def : Pat<(ctlz GR64:$src),
+ (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 0000000..fd6e330
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,51 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares SystemZ-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private SystemZ target-specific information for each MachineFunction.
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// LowReg - Low register of range of callee-saved registers to store.
+ unsigned LowReg;
+
+ /// HighReg - High register of range of callee-saved registers to store.
+ unsigned HighReg;
+public:
+ SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getLowReg() const { return LowReg; }
+ void setLowReg(unsigned Reg) { LowReg = Reg; }
+
+ unsigned getHighReg() const { return HighReg; }
+ void setHighReg(unsigned Reg) { HighReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 0000000..8b835cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,325 @@
+//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various SystemZ instruction operands.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff.
+//===----------------------------------------------------------------------===//
+
+// SystemZ specific condition code. These correspond to CondCode in
+// SystemZ.h. They must be kept in synch.
+def SYSTEMZ_COND_O : PatLeaf<(i8 0)>;
+def SYSTEMZ_COND_H : PatLeaf<(i8 1)>;
+def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
+def SYSTEMZ_COND_L : PatLeaf<(i8 3)>;
+def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
+def SYSTEMZ_COND_LH : PatLeaf<(i8 5)>;
+def SYSTEMZ_COND_NE : PatLeaf<(i8 6)>;
+def SYSTEMZ_COND_E : PatLeaf<(i8 7)>;
+def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
+def SYSTEMZ_COND_HE : PatLeaf<(i8 9)>;
+def SYSTEMZ_COND_NL : PatLeaf<(i8 10)>;
+def SYSTEMZ_COND_LE : PatLeaf<(i8 11)>;
+def SYSTEMZ_COND_NH : PatLeaf<(i8 12)>;
+def SYSTEMZ_COND_NO : PatLeaf<(i8 13)>;
+
+def LO8 : SDNodeXForm<imm, [{
+ // Transformation function: return low 8 bits.
+ return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
+}]>;
+
+def LL16 : SDNodeXForm<imm, [{
+ // Transformation function: return low 16 bits.
+ return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
+}]>;
+
+def LH16 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 16-31.
+ return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
+}]>;
+
+def HL16 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 32-47.
+ return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
+}]>;
+
+def HH16 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 48-63.
+ return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
+}]>;
+
+def LO32 : SDNodeXForm<imm, [{
+ // Transformation function: return low 32 bits.
+ return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
+}]>;
+
+def HI32 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 32-63.
+ return getI32Imm(N->getZExtValue() >> 32);
+}]>;
+
+def GetI64FromI32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+}]>;
+
+def i32ll16 : PatLeaf<(i32 imm), [{
+ // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
+ // bits set.
+ return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16 : PatLeaf<(i32 imm), [{
+ // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
+ return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i32ll16c : PatLeaf<(i32 imm), [{
+ // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
+ return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16c : PatLeaf<(i32 imm), [{
+ // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
+ // bits set.
+ return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64ll16 : PatLeaf<(i64 imm), [{
+ // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
+ // bits set.
+ return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16 : PatLeaf<(i64 imm), [{
+ // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
+ return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16 : PatLeaf<(i64 imm), [{
+ // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
+ return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16 : PatLeaf<(i64 imm), [{
+ // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
+ return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
+}], HH16>;
+
+def i64ll16c : PatLeaf<(i64 imm), [{
+ // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
+ // bits set.
+ return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16c : PatLeaf<(i64 imm), [{
+ // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
+ return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16c : PatLeaf<(i64 imm), [{
+ // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
+ return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16c : PatLeaf<(i64 imm), [{
+ // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
+ return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
+}], HH16>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
+ // field.
+ if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((int64_t)val == (int16_t)val);
+ } else if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((int32_t)val == (int16_t)val);
+ }
+
+ return false;
+}], LL16>;
+
+def immSExt32 : PatLeaf<(i64 imm), [{
+ // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
+ // field.
+ uint64_t val = N->getZExtValue();
+ return ((int64_t)val == (int32_t)val);
+}], LO32>;
+
+def i64lo32 : PatLeaf<(i64 imm), [{
+ // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+ // bits set.
+ return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32 : PatLeaf<(i64 imm), [{
+ // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+ return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], HI32>;
+
+def i64lo32c : PatLeaf<(i64 imm), [{
+ // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+ // bits set.
+ return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32c : PatLeaf<(i64 imm), [{
+ // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+ return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], HI32>;
+
+def i32immSExt8 : PatLeaf<(i32 imm), [{
+ // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}], LO8>;
+
+def i32immSExt16 : PatLeaf<(i32 imm), [{
+ // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
+ // sign extended field.
+ return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
+}], LL16>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}], LO32>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // zero extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}], LO32>;
+
+// extloads
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+// A couple of more descriptive operand definitions.
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32>;
+// 32-bits but only 16 bits are significant.
+def i32i16imm : Operand<i32>;
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Unsigned i12
+def u12imm : Operand<i32> {
+ let PrintMethod = "printU12ImmOperand";
+}
+def u12imm64 : Operand<i64> {
+ let PrintMethod = "printU12ImmOperand";
+}
+
+// Signed i16
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+// Unsigned i16
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+// Signed i20
+def s20imm : Operand<i32> {
+ let PrintMethod = "printS20ImmOperand";
+}
+def s20imm64 : Operand<i64> {
+ let PrintMethod = "printS20ImmOperand";
+}
+// Signed i32
+def s32imm : Operand<i32> {
+ let PrintMethod = "printS32ImmOperand";
+}
+def s32imm64 : Operand<i64> {
+ let PrintMethod = "printS32ImmOperand";
+}
+// Unsigned i32
+def u32imm : Operand<i32> {
+ let PrintMethod = "printU32ImmOperand";
+}
+def u32imm64 : Operand<i64> {
+ let PrintMethod = "printU32ImmOperand";
+}
+
+def imm_pcrel : Operand<i64> {
+ let PrintMethod = "printPCRelImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+
+// riaddr := reg + imm
+def riaddr32 : Operand<i64>,
+ ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
+ let PrintMethod = "printRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
+}
+
+def riaddr12 : Operand<i64>,
+ ComplexPattern<i64, 2, "SelectAddrRI12", []> {
+ let PrintMethod = "printRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
+}
+
+def riaddr : Operand<i64>,
+ ComplexPattern<i64, 2, "SelectAddrRI", []> {
+ let PrintMethod = "printRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
+}
+
+//===----------------------------------------------------------------------===//
+
+// rriaddr := reg + reg + imm
+def rriaddr12 : Operand<i64>,
+ ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
+ let PrintMethod = "printRRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
+}
+def rriaddr : Operand<i64>,
+ ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
+ let PrintMethod = "printRRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
+def laaddr : Operand<i64>,
+ ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
+ let PrintMethod = "printRRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 0000000..b1050d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,143 @@
+//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+ const SystemZInstrInfo &tii)
+ : SystemZGenRegisterInfo(0), TM(tm), TII(tii) {
+}
+
+const unsigned*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
+ SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+ SystemZ::R14D, SystemZ::R15D,
+ SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
+ SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF)) {
+ // R11D is the frame pointer. Reserve all aliases.
+ Reserved.set(SystemZ::R11D);
+ Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R10P);
+ Reserved.set(SystemZ::R10Q);
+ }
+
+ Reserved.set(SystemZ::R14D);
+ Reserved.set(SystemZ::R15D);
+ Reserved.set(SystemZ::R14W);
+ Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R14P);
+ Reserved.set(SystemZ::R14Q);
+ return Reserved;
+}
+
+const TargetRegisterClass*
+SystemZRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ switch(Idx) {
+ // Exact sub-classes don't exist for the other sub-register indexes.
+ default: return 0;
+ case SystemZ::subreg_32bit:
+ if (B == SystemZ::ADDR32RegisterClass)
+ return A->getSize() == 8 ? SystemZ::ADDR64RegisterClass : 0;
+ return A;
+ }
+}
+
+void SystemZRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MBB.erase(I);
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unxpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+
+ // This must be part of a rri or ri operand memory reference. Replace the
+ // FrameIndex with base register with BasePtr. Add an offset to the
+ // displacement field.
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ // Offset is a either 12-bit unsigned or 20-bit signed integer.
+ // FIXME: handle "too long" displacements.
+ int Offset =
+ TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+
+ // Check whether displacement is too long to fit into 12 bit zext field.
+ MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
+
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ assert(0 && "What is the frame register");
+ return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 0000000..03935b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,60 @@
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+class Type;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+ SystemZTargetMachine &TM;
+ const SystemZInstrInfo &TII;
+
+ SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ const TargetRegisterClass*
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 0000000..a24cbcf
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,205 @@
+//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+ let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "SystemZ";
+}
+
+// We identify all our registers with a 4-bit ID, for consistency's sake.
+
+// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<4> num, string n> : SystemZReg<n> {
+ field bits<4> Num = num;
+}
+
+// GPR64 - One of the 16 64-bit general-purpose registers
+class GPR64<bits<4> num, string n, list<Register> subregs,
+ list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+ field bits<4> Num = num;
+ let Aliases = aliases;
+}
+
+// GPR128 - 8 even-odd register pairs
+class GPR128<bits<4> num, string n, list<Register> subregs,
+ list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+ field bits<4> Num = num;
+ let Aliases = aliases;
+}
+
+// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPRS<bits<4> num, string n> : SystemZReg<n> {
+ field bits<4> Num = num;
+}
+
+// FPRL - One of the 16 64-bit floating-point registers
+class FPRL<bits<4> num, string n, list<Register> subregs>
+ : SystemZRegWithSubregs<n, subregs> {
+ field bits<4> Num = num;
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit : SubRegIndex;
+def subreg_odd32 : SubRegIndex;
+def subreg_even : SubRegIndex;
+def subreg_odd : SubRegIndex;
+}
+
+// General-purpose registers
+def R0W : GPR32< 0, "r0">;
+def R1W : GPR32< 1, "r1">;
+def R2W : GPR32< 2, "r2">;
+def R3W : GPR32< 3, "r3">;
+def R4W : GPR32< 4, "r4">;
+def R5W : GPR32< 5, "r5">;
+def R6W : GPR32< 6, "r6">;
+def R7W : GPR32< 7, "r7">;
+def R8W : GPR32< 8, "r8">;
+def R9W : GPR32< 9, "r9">;
+def R10W : GPR32<10, "r10">;
+def R11W : GPR32<11, "r11">;
+def R12W : GPR32<12, "r12">;
+def R13W : GPR32<13, "r13">;
+def R14W : GPR32<14, "r14">;
+def R15W : GPR32<15, "r15">;
+
+let SubRegIndices = [subreg_32bit] in {
+def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>;
+def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>;
+def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>;
+def R3D : GPR64< 3, "r3", [R3W]>, DwarfRegNum<[3]>;
+def R4D : GPR64< 4, "r4", [R4W]>, DwarfRegNum<[4]>;
+def R5D : GPR64< 5, "r5", [R5W]>, DwarfRegNum<[5]>;
+def R6D : GPR64< 6, "r6", [R6W]>, DwarfRegNum<[6]>;
+def R7D : GPR64< 7, "r7", [R7W]>, DwarfRegNum<[7]>;
+def R8D : GPR64< 8, "r8", [R8W]>, DwarfRegNum<[8]>;
+def R9D : GPR64< 9, "r9", [R9W]>, DwarfRegNum<[9]>;
+def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
+def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
+def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
+def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
+def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
+def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
+
+// Register pairs
+let SubRegIndices = [subreg_32bit, subreg_odd32] in {
+def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>;
+def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>;
+def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>;
+def R6P : GPR64< 6, "r6", [R6W, R7W], [R6D, R7D]>;
+def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>;
+def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>;
+def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>;
+def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>;
+}
+
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in {
+def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>;
+def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>;
+def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>;
+def R6Q : GPR128< 6, "r6", [R6D, R7D], [R6P]>;
+def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>;
+def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>;
+def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>;
+def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>;
+}
+
+// Floating-point registers
+def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>;
+def F1S : FPRS< 1, "f1">, DwarfRegNum<[17]>;
+def F2S : FPRS< 2, "f2">, DwarfRegNum<[18]>;
+def F3S : FPRS< 3, "f3">, DwarfRegNum<[19]>;
+def F4S : FPRS< 4, "f4">, DwarfRegNum<[20]>;
+def F5S : FPRS< 5, "f5">, DwarfRegNum<[21]>;
+def F6S : FPRS< 6, "f6">, DwarfRegNum<[22]>;
+def F7S : FPRS< 7, "f7">, DwarfRegNum<[23]>;
+def F8S : FPRS< 8, "f8">, DwarfRegNum<[24]>;
+def F9S : FPRS< 9, "f9">, DwarfRegNum<[25]>;
+def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
+def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
+def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
+def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
+def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
+def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+
+let SubRegIndices = [subreg_32bit] in {
+def F0L : FPRL< 0, "f0", [F0S]>;
+def F1L : FPRL< 1, "f1", [F1S]>;
+def F2L : FPRL< 2, "f2", [F2S]>;
+def F3L : FPRL< 3, "f3", [F3S]>;
+def F4L : FPRL< 4, "f4", [F4S]>;
+def F5L : FPRL< 5, "f5", [F5S]>;
+def F6L : FPRL< 6, "f6", [F6S]>;
+def F7L : FPRL< 7, "f7", [F7S]>;
+def F8L : FPRL< 8, "f8", [F8S]>;
+def F9L : FPRL< 9, "f9", [F9S]>;
+def F10L : FPRL<10, "f10", [F10S]>;
+def F11L : FPRL<11, "f11", [F11S]>;
+def F12L : FPRL<12, "f12", [F12S]>;
+def F13L : FPRL<13, "f13", [F13S]>;
+def F14L : FPRL<14, "f14", [F14S]>;
+def F15L : FPRL<15, "f15", [F15S]>;
+}
+
+// Status register
+def PSW : SystemZReg<"psw">;
+
+/// Register classes.
+/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW", 0, 5),
+ (sequence "R%uW", 15, 6))>;
+
+/// Registers used to generate address. Everything except R0.
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
+
+def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))> {
+ let SubRegClasses = [(GR32 subreg_32bit)];
+}
+
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
+ let SubRegClasses = [(ADDR32 subreg_32bit)];
+}
+
+// Even-odd register pairs
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
+ R12P, R10P, R8P, R6P,
+ R14P)> {
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
+}
+
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
+ R12Q, R10Q, R8Q, R6Q,
+ R14Q)> {
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
+ (GR64 subreg_even, subreg_odd)];
+}
+
+def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
+
+def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
+ let SubRegClasses = [(FP32 subreg_32bit)];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 0000000..3eabcd2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZTargetMachine.h"
+using namespace llvm;
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 0000000..1450401
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+ ~SystemZSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 0000000..0845510
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,54 @@
+//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "SystemZ.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS):
+ SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "z9";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
+
+/// True if accessing the GV requires an extra load.
+bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // Extra load is needed for all externally visible.
+ if (isDirectCall)
+ return false;
+
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 0000000..55cfd80
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,48 @@
+//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
+#define LLVM_TARGET_SystemZ_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+ bool HasZ10Insts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ SystemZSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool isZ10() const { return HasZ10Insts; }
+
+ bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 0000000..e390f06
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,40 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "SystemZ.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+ // Register the target.
+ RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+}
+
+/// SystemZTargetMachine ctor - Create an ILP64 architecture model
+///
+SystemZTargetMachine::SystemZTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+ "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) {
+}
+
+bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createSystemZISelDag(*this, OptLevel));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 0000000..43dce4b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,68 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+
+#include "SystemZInstrInfo.h"
+#include "SystemZISelLowering.h"
+#include "SystemZFrameLowering.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// SystemZTargetMachine
+///
+class SystemZTargetMachine : public LLVMTargetMachine {
+ SystemZSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ SystemZInstrInfo InstrInfo;
+ SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
+ SystemZFrameLowering FrameLowering;
+public:
+ SystemZTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const SystemZRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const SystemZTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // SystemZTargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 0000000..da99282
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+ RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
new file mode 100644
index 0000000..a2b83bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -0,0 +1,108 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMTarget.a, which implements target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/LLVMContext.h"
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeTarget(PassRegistry &Registry) {
+ initializeTargetDataPass(Registry);
+ initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+ initializeTarget(*unwrap(R));
+}
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+ return wrap(new TargetData(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetData(*unwrap(TD)));
+}
+
+void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
+ LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+ std::string StringRep = unwrap(TD)->getStringRepresentation();
+ return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+ return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian;
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+ return unwrap(TD)->getPointerSize();
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+ LLVMValueRef GlobalVar) {
+ return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned long long Offset) {
+ StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned Element) {
+ StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+ delete unwrap(TD);
+}
diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp
new file mode 100644
index 0000000..bd6a6b6
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetData.cpp
@@ -0,0 +1,598 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&. None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+
+// Register the default SparcV9 implementation...
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(StructType *ST, const TargetData &TD) {
+ assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
+ StructAlignment = 0;
+ StructSize = 0;
+ NumElements = ST->getNumElements();
+
+ // Loop over each of the elements, placing them in memory.
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Type *Ty = ST->getElementType(i);
+ unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+ // Add padding if necessary to align the data element properly.
+ if ((StructSize & (TyAlign-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
+
+ // Keep track of maximum alignment constraint.
+ StructAlignment = std::max(TyAlign, StructAlignment);
+
+ MemberOffsets[i] = StructSize;
+ StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+ }
+
+ // Empty structures have alignment of 1 byte.
+ if (StructAlignment == 0) StructAlignment = 1;
+
+ // Add padding to the end of the struct so that it could be put in an array
+ // and all array elements would be aligned correctly.
+ if ((StructSize & (StructAlignment-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+ const uint64_t *SI =
+ std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+ assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+ --SI;
+ assert(*SI <= Offset && "upper_bound didn't work");
+ assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+ (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+ "Upper bound didn't work!");
+
+ // Multiple fields can have the same offset if any of them are zero sized.
+ // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+ // at the i32 element, because it is the last element at that offset. This is
+ // the right one to return, because anything after it will have a higher
+ // offset, implying that this element is non-empty.
+ return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ TargetAlignElem retval;
+ retval.AlignType = align_type;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+ return (AlignType == rhs.AlignType
+ && ABIAlign == rhs.ABIAlign
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+ TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// getInt - Get an integer ignoring errors.
+static unsigned getInt(StringRef R) {
+ unsigned Result = 0;
+ R.getAsInteger(10, Result);
+ return Result;
+}
+
+void TargetData::init(StringRef Desc) {
+ initializeTargetDataPass(*PassRegistry::getPassRegistry());
+
+ LayoutMap = 0;
+ LittleEndian = false;
+ PointerMemSize = 8;
+ PointerABIAlign = 8;
+ PointerPrefAlign = PointerABIAlign;
+ StackNaturalAlign = 0;
+
+ // Default alignments
+ setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
+ setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8
+ setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
+ setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
+ setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
+ setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
+ setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ...
+ setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
+ setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
+
+ while (!Desc.empty()) {
+ std::pair<StringRef, StringRef> Split = Desc.split('-');
+ StringRef Token = Split.first;
+ Desc = Split.second;
+
+ if (Token.empty())
+ continue;
+
+ Split = Token.split(':');
+ StringRef Specifier = Split.first;
+ Token = Split.second;
+
+ assert(!Specifier.empty() && "Can't be empty here");
+
+ switch (Specifier[0]) {
+ case 'E':
+ LittleEndian = false;
+ break;
+ case 'e':
+ LittleEndian = true;
+ break;
+ case 'p':
+ Split = Token.split(':');
+ PointerMemSize = getInt(Split.first) / 8;
+ Split = Split.second.split(':');
+ PointerABIAlign = getInt(Split.first) / 8;
+ Split = Split.second.split(':');
+ PointerPrefAlign = getInt(Split.first) / 8;
+ if (PointerPrefAlign == 0)
+ PointerPrefAlign = PointerABIAlign;
+ break;
+ case 'i':
+ case 'v':
+ case 'f':
+ case 'a':
+ case 's': {
+ AlignTypeEnum AlignType;
+ switch (Specifier[0]) {
+ default:
+ case 'i': AlignType = INTEGER_ALIGN; break;
+ case 'v': AlignType = VECTOR_ALIGN; break;
+ case 'f': AlignType = FLOAT_ALIGN; break;
+ case 'a': AlignType = AGGREGATE_ALIGN; break;
+ case 's': AlignType = STACK_ALIGN; break;
+ }
+ unsigned Size = getInt(Specifier.substr(1));
+ Split = Token.split(':');
+ unsigned ABIAlign = getInt(Split.first) / 8;
+
+ Split = Split.second.split(':');
+ unsigned PrefAlign = getInt(Split.first) / 8;
+ if (PrefAlign == 0)
+ PrefAlign = ABIAlign;
+ setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+ break;
+ }
+ case 'n': // Native integer types.
+ Specifier = Specifier.substr(1);
+ do {
+ if (unsigned Width = getInt(Specifier))
+ LegalIntWidths.push_back(Width);
+ Split = Token.split(':');
+ Specifier = Split.first;
+ Token = Split.second;
+ } while (!Specifier.empty() || !Token.empty());
+ break;
+ case 'S': // Stack natural alignment.
+ StackNaturalAlign = getInt(Specifier.substr(1));
+ StackNaturalAlign /= 8;
+ // FIXME: Should we really be truncating these alingments and
+ // sizes silently?
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetData::TargetData() : ImmutablePass(ID) {
+ report_fatal_error("Bad TargetData ctor used. "
+ "Tool did not specify a TargetData to use?");
+}
+
+TargetData::TargetData(const Module *M)
+ : ImmutablePass(ID) {
+ init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == align_type &&
+ Alignments[i].TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ Alignments[i].ABIAlign = abi_align;
+ Alignments[i].PrefAlign = pref_align;
+ return;
+ }
+ }
+
+ Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+ uint32_t BitWidth, bool ABIInfo,
+ Type *Ty) const {
+ // Check to see if we have an exact match and remember the best match we see.
+ int BestMatchIdx = -1;
+ int LargestInt = -1;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == AlignType &&
+ Alignments[i].TypeBitWidth == BitWidth)
+ return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+ // The best match so far depends on what we're looking for.
+ if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
+ // The "best match" for integers is the smallest size that is larger than
+ // the BitWidth requested.
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+ BestMatchIdx = i;
+ // However, if there isn't one that's larger, then we must use the
+ // largest one we have (see below)
+ if (LargestInt == -1 ||
+ Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+ LargestInt = i;
+ }
+ }
+
+ // Okay, we didn't find an exact solution. Fall back here depending on what
+ // is being looked for.
+ if (BestMatchIdx == -1) {
+ // If we didn't find an integer alignment, fall back on most conservative.
+ if (AlignType == INTEGER_ALIGN) {
+ BestMatchIdx = LargestInt;
+ } else {
+ assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+ // By default, use natural alignment for vector types. This is consistent
+ // with what clang and llvm-gcc do.
+ unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ Align *= cast<VectorType>(Ty)->getNumElements();
+ // If the alignment is not a power of 2, round up to the next power of 2.
+ // This happens for non-power-of-2 length vectors.
+ if (Align & (Align-1))
+ Align = llvm::NextPowerOf2(Align);
+ return Align;
+ }
+ }
+
+ // Since we got a "best match" index, just return it.
+ return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+ : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+class StructLayoutMap {
+ typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
+ LayoutInfoTy LayoutInfo;
+
+public:
+ virtual ~StructLayoutMap() {
+ // Remove any layouts.
+ for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+ I != E; ++I) {
+ StructLayout *Value = I->second;
+ Value->~StructLayout();
+ free(Value);
+ }
+ }
+
+ StructLayout *&operator[](StructType *STy) {
+ return LayoutInfo[STy];
+ }
+
+ // for debugging...
+ virtual void dump() const {}
+};
+
+} // end anonymous namespace
+
+TargetData::~TargetData() {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+}
+
+const StructLayout *TargetData::getStructLayout(StructType *Ty) const {
+ if (!LayoutMap)
+ LayoutMap = new StructLayoutMap();
+
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ StructLayout *&SL = (*STM)[Ty];
+ if (SL) return SL;
+
+ // Otherwise, create the struct layout. Because it is variable length, we
+ // malloc it, then use placement new.
+ int NumElts = Ty->getNumElements();
+ StructLayout *L =
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
+
+ // Set SL before calling StructLayout's ctor. The ctor could cause other
+ // entries to be added to TheMap, invalidating our reference.
+ SL = L;
+
+ new (L) StructLayout(Ty, *this);
+
+ return L;
+}
+
+std::string TargetData::getStringRepresentation() const {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << (LittleEndian ? "e" : "E")
+ << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
+ << ':' << PointerPrefAlign*8
+ << "-S" << StackNaturalAlign*8;
+
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ const TargetAlignElem &AI = Alignments[i];
+ OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+ << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+ }
+
+ if (!LegalIntWidths.empty()) {
+ OS << "-n" << (unsigned)LegalIntWidths[0];
+
+ for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+ OS << ':' << (unsigned)LegalIntWidths[i];
+ }
+ return OS.str();
+}
+
+
+uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return getPointerSizeInBits();
+ case Type::ArrayTyID: {
+ ArrayType *ATy = cast<ArrayType>(Ty);
+ return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
+ }
+ case Type::StructTyID:
+ // Get the layout annotation... which is lazily created on demand.
+ return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+ case Type::IntegerTyID:
+ return cast<IntegerType>(Ty)->getBitWidth();
+ case Type::VoidTyID:
+ return 8;
+ case Type::FloatTyID:
+ return 32;
+ case Type::DoubleTyID:
+ case Type::X86_MMXTyID:
+ return 64;
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ return 128;
+ // In memory objects this is always aligned to a higher boundary, but
+ // only 80 bits contain information.
+ case Type::X86_FP80TyID:
+ return 80;
+ case Type::VectorTyID:
+ return cast<VectorType>(Ty)->getBitWidth();
+ default:
+ llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
+ break;
+ }
+ return 0;
+}
+
+/*!
+ \param abi_or_pref Flag that determines which alignment is returned. true
+ returns the ABI alignment, false returns the preferred alignment.
+ \param Ty The underlying type for which alignment is determined.
+
+ Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+ == false) for the requested type \a Ty.
+ */
+unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
+ int AlignType = -1;
+
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ // Early escape for the non-numeric types.
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return (abi_or_pref
+ ? getPointerABIAlignment()
+ : getPointerPrefAlignment());
+ case Type::ArrayTyID:
+ return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+ case Type::StructTyID: {
+ // Packed structure types always have an ABI alignment of one.
+ if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+ return 1;
+
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ return std::max(Align, Layout->getAlignment());
+ }
+ case Type::IntegerTyID:
+ case Type::VoidTyID:
+ AlignType = INTEGER_ALIGN;
+ break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ // PPC_FP128TyID and FP128TyID have different data contents, but the
+ // same size and alignment, so they look the same here.
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ case Type::X86_FP80TyID:
+ AlignType = FLOAT_ALIGN;
+ break;
+ case Type::X86_MMXTyID:
+ case Type::VectorTyID:
+ AlignType = VECTOR_ALIGN;
+ break;
+ default:
+ llvm_unreachable("Bad type for getAlignment!!!");
+ break;
+ }
+
+ return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+ abi_or_pref, Ty);
+}
+
+unsigned TargetData::getABITypeAlignment(Type *Ty) const {
+ return getAlignment(Ty, true);
+}
+
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+ return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
+unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const {
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+ if (Alignments[i].AlignType == STACK_ALIGN)
+ return Alignments[i].ABIAlign;
+
+ return getABITypeAlignment(Ty);
+}
+
+unsigned TargetData::getPrefTypeAlignment(Type *Ty) const {
+ return getAlignment(Ty, false);
+}
+
+unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const {
+ unsigned Align = getPrefTypeAlignment(Ty);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+ return IntegerType::get(C, getPointerSizeInBits());
+}
+
+
+uint64_t TargetData::getIndexedOffset(Type *ptrTy,
+ ArrayRef<Value *> Indices) const {
+ Type *Ty = ptrTy;
+ assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
+ uint64_t Result = 0;
+
+ generic_gep_type_iterator<Value* const*>
+ TI = gep_type_begin(ptrTy, Indices);
+ for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
+ ++CurIDX, ++TI) {
+ if (StructType *STy = dyn_cast<StructType>(*TI)) {
+ assert(Indices[CurIDX]->getType() ==
+ Type::getInt32Ty(ptrTy->getContext()) &&
+ "Illegal struct idx");
+ unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+ // Get structure layout information...
+ const StructLayout *Layout = getStructLayout(STy);
+
+ // Add in the offset, as calculated by the structure layout info...
+ Result += Layout->getElementOffset(FieldNo);
+
+ // Update Ty to refer to current element
+ Ty = STy->getElementType(FieldNo);
+ } else {
+ // Update Ty to refer to current element
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // Get the array index and the size of each array element.
+ if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+ Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
+ }
+ }
+
+ return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global. This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
+ Type *ElemType = GV->getType()->getElementType();
+ unsigned Alignment = getPrefTypeAlignment(ElemType);
+ unsigned GVAlignment = GV->getAlignment();
+ if (GVAlignment >= Alignment) {
+ Alignment = GVAlignment;
+ } else if (GVAlignment != 0) {
+ Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+ }
+
+ if (GV->hasInitializer() && GVAlignment == 0) {
+ if (Alignment < 16) {
+ // If the global is not external, see if it is large. If so, give it a
+ // larger alignment.
+ if (getTypeSizeInBits(ElemType) > 128)
+ Alignment = 16; // 16-byte alignment.
+ }
+ }
+ return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form. This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+ return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
new file mode 100644
index 0000000..a661ee9
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+ is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
new file mode 100644
index 0000000..122f869
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
@@ -0,0 +1,45 @@
+//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
diff --git a/contrib/llvm/lib/Target/TargetInstrInfo.cpp b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 0000000..d52ecb3
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,174 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TargetInstrInfo
+//===----------------------------------------------------------------------===//
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (OpNum >= MCID.getNumOperands())
+ return 0;
+
+ short RegClass = MCID.OpInfo[OpNum].RegClass;
+ if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+ return TRI->getPointerRegClass(RegClass);
+
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return 0;
+
+ // Otherwise just look it up normally.
+ return TRI->getRegClass(RegClass);
+}
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+
+
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(*Str)) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
diff --git a/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 0000000..e049a1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,30 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo() {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
+
+unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const {
+ const ValueName *ValName = F->getValueName();
+ if (!ValName)
+ return 0;
+ return lookupName(ValName->getKeyData(), ValName->getKeyLength());
+}
diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 0000000..709dfd2
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,74 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+ "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple. This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+
+ // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+ if (T.isMacOSX()) {
+ if (T.isMacOSXVersionLT(10, 5))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else if (T.getOS() == Triple::IOS) {
+ if (T.isOSVersionLT(3, 0))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else {
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ }
+
+ // iprintf and friends are only available on XCore and TCE.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ TLI.setUnavailable(LibFunc::iprintf);
+ TLI.setUnavailable(LibFunc::siprintf);
+ TLI.setUnavailable(LibFunc::fiprintf);
+ }
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, Triple());
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, T);
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
+ : ImmutablePass(ID) {
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+}
+
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+ memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
new file mode 100644
index 0000000..56b7b69
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -0,0 +1,325 @@
+//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Code
+//===----------------------------------------------------------------------===//
+
+/// Initialize - this method must be called before any actual lowering is
+/// done. This specifies the current context for codegen, and gives the
+/// lowering implementations a chance to set up their default sections.
+void TargetLoweringObjectFile::Initialize(MCContext &ctx,
+ const TargetMachine &TM) {
+ Ctx = &ctx;
+ InitMCObjectFileInfo(TM.getTargetTriple(),
+ TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
+}
+
+TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+ const Constant *C = GV->getInitializer();
+
+ // Must have zero initializer.
+ if (!C->isNullValue())
+ return false;
+
+ // Leave constant zeros in readonly constant sections, so they can be shared.
+ if (GV->isConstant())
+ return false;
+
+ // If the global has an explicit section specified, don't put it in BSS.
+ if (!GV->getSection().empty())
+ return false;
+
+ // If -nozero-initialized-in-bss is specified, don't ever use BSS.
+ if (NoZerosInBSS)
+ return false;
+
+ // Otherwise, put it in BSS!
+ return true;
+}
+
+/// IsNullTerminatedString - Return true if the specified constant (which is
+/// known to have a type that is an array of 1/2/4 byte elements) ends with a
+/// nul value and contains no other nuls in it.
+static bool IsNullTerminatedString(const Constant *C) {
+ ArrayType *ATy = cast<ArrayType>(C->getType());
+
+ // First check: is we have constant array of i8 terminated with zero
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
+ if (ATy->getNumElements() == 0) return false;
+
+ ConstantInt *Null =
+ dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
+ if (Null == 0 || !Null->isZero())
+ return false; // Not null terminated.
+
+ // Verify that the null doesn't occur anywhere else in the string.
+ for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
+ // Reject constantexpr elements etc.
+ if (!isa<ConstantInt>(CVA->getOperand(i)) ||
+ CVA->getOperand(i) == Null)
+ return false;
+ return true;
+ }
+
+ // Another possibility: [1 x i8] zeroinitializer
+ if (isa<ConstantAggregateZero>(C))
+ return ATy->getNumElements() == 1;
+
+ return false;
+}
+
+MCSymbol *TargetLoweringObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
+void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+}
+
+
+/// getKindForGlobal - This is a top-level target-independent classifier for
+/// a global variable. Given an global variable and information from TM, it
+/// classifies the global in a variety of ways that make various target
+/// implementations simpler. The target implementation is free to ignore this
+/// extra info of course.
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+ const TargetMachine &TM){
+ assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+ "Can only be used for global definitions");
+
+ Reloc::Model ReloModel = TM.getRelocationModel();
+
+ // Early exit - functions should be always in text sections.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (GVar == 0)
+ return SectionKind::getText();
+
+ // Handle thread-local data first.
+ if (GVar->isThreadLocal()) {
+ if (isSuitableForBSS(GVar))
+ return SectionKind::getThreadBSS();
+ return SectionKind::getThreadData();
+ }
+
+ // Variables with common linkage always get classified as common.
+ if (GVar->hasCommonLinkage())
+ return SectionKind::getCommon();
+
+ // Variable can be easily put to BSS section.
+ if (isSuitableForBSS(GVar)) {
+ if (GVar->hasLocalLinkage())
+ return SectionKind::getBSSLocal();
+ else if (GVar->hasExternalLinkage())
+ return SectionKind::getBSSExtern();
+ return SectionKind::getBSS();
+ }
+
+ const Constant *C = GVar->getInitializer();
+
+ // If the global is marked constant, we can put it into a mergable section,
+ // a mergable string section, or general .data if it contains relocations.
+ if (GVar->isConstant()) {
+ // If the initializer for the global contains something that requires a
+ // relocation, then we may have to drop this into a wriable data section
+ // even though it is marked const.
+ switch (C->getRelocationInfo()) {
+ default: assert(0 && "unknown relocation info kind");
+ case Constant::NoRelocation:
+ // If the global is required to have a unique address, it can't be put
+ // into a mergable section: just drop it into the general read-only
+ // section instead.
+ if (!GVar->hasUnnamedAddr())
+ return SectionKind::getReadOnly();
+
+ // If initializer is a null-terminated string, put it in a "cstring"
+ // section of the right width.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ if (IntegerType *ITy =
+ dyn_cast<IntegerType>(ATy->getElementType())) {
+ if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
+ ITy->getBitWidth() == 32) &&
+ IsNullTerminatedString(C)) {
+ if (ITy->getBitWidth() == 8)
+ return SectionKind::getMergeable1ByteCString();
+ if (ITy->getBitWidth() == 16)
+ return SectionKind::getMergeable2ByteCString();
+
+ assert(ITy->getBitWidth() == 32 && "Unknown width");
+ return SectionKind::getMergeable4ByteCString();
+ }
+ }
+ }
+
+ // Otherwise, just drop it into a mergable constant section. If we have
+ // a section for this size, use it, otherwise use the arbitrary sized
+ // mergable section.
+ switch (TM.getTargetData()->getTypeAllocSize(C->getType())) {
+ case 4: return SectionKind::getMergeableConst4();
+ case 8: return SectionKind::getMergeableConst8();
+ case 16: return SectionKind::getMergeableConst16();
+ default: return SectionKind::getMergeableConst();
+ }
+
+ case Constant::LocalRelocation:
+ // In static relocation model, the linker will resolve all addresses, so
+ // the relocation entries will actually be constants by the time the app
+ // starts up. However, we can't put this into a mergable section, because
+ // the linker doesn't take relocations into consideration when it tries to
+ // merge entries in the section.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getReadOnly();
+
+ // Otherwise, the dynamic linker needs to fix it up, put it in the
+ // writable data.rel.local section.
+ return SectionKind::getReadOnlyWithRelLocal();
+
+ case Constant::GlobalRelocations:
+ // In static relocation model, the linker will resolve all addresses, so
+ // the relocation entries will actually be constants by the time the app
+ // starts up. However, we can't put this into a mergable section, because
+ // the linker doesn't take relocations into consideration when it tries to
+ // merge entries in the section.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getReadOnly();
+
+ // Otherwise, the dynamic linker needs to fix it up, put it in the
+ // writable data.rel section.
+ return SectionKind::getReadOnlyWithRel();
+ }
+ }
+
+ // Okay, this isn't a constant. If the initializer for the global is going
+ // to require a runtime relocation by the dynamic linker, put it into a more
+ // specific section to improve startup time of the app. This coalesces these
+ // globals together onto fewer pages, improving the locality of the dynamic
+ // linker.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getDataNoRel();
+
+ switch (C->getRelocationInfo()) {
+ default: assert(0 && "unknown relocation info kind");
+ case Constant::NoRelocation:
+ return SectionKind::getDataNoRel();
+ case Constant::LocalRelocation:
+ return SectionKind::getDataRelLocal();
+ case Constant::GlobalRelocations:
+ return SectionKind::getDataRel();
+ }
+}
+
+/// SectionForGlobal - This method computes the appropriate section to emit
+/// the specified global variable or function definition. This should not
+/// be passed external (or available externally) globals.
+const MCSection *TargetLoweringObjectFile::
+SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang,
+ const TargetMachine &TM) const {
+ // Select section name.
+ if (GV->hasSection())
+ return getExplicitSectionGlobal(GV, Kind, Mang, TM);
+
+
+ // Use default section depending on the 'type' of global
+ return SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+
+// Lame default implementation. Calculate the section name for global.
+const MCSection *
+TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const{
+ assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+ if (Kind.isText())
+ return getTextSection();
+
+ if (Kind.isBSS() && BSSSection != 0)
+ return BSSSection;
+
+ if (Kind.isReadOnly() && ReadOnlySection != 0)
+ return ReadOnlySection;
+
+ return getDataSection();
+}
+
+/// getSectionForConstant - Given a mergable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isReadOnly() && ReadOnlySection != 0)
+ return ReadOnlySection;
+
+ return DataSection;
+}
+
+/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// reference to the specified global variable from exception
+/// handling information.
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCSymbol *Sym = Mang->getSymbol(GV);
+ return getExprForDwarfReference(Sym, Encoding, Streamer);
+}
+
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ // Do nothing special
+ return Res;
+ case dwarf::DW_EH_PE_pcrel: {
+ // Emit a label to the streamer for the current position. This gives us
+ // .-foo addressing.
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
new file mode 100644
index 0000000..fe8a7ce
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -0,0 +1,284 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+ bool LessPreciseFPMADOption;
+ bool PrintMachineCode;
+ bool NoFramePointerElim;
+ bool NoFramePointerElimNonLeaf;
+ bool NoExcessFPPrecision;
+ bool UnsafeFPMath;
+ bool NoInfsFPMath;
+ bool NoNaNsFPMath;
+ bool HonorSignDependentRoundingFPMathOption;
+ bool UseSoftFloat;
+ FloatABI::ABIType FloatABIType;
+ bool NoImplicitFloat;
+ bool NoZerosInBSS;
+ bool JITExceptionHandling;
+ bool JITEmitDebugInfo;
+ bool JITEmitDebugInfoToDisk;
+ bool GuaranteedTailCallOpt;
+ unsigned StackAlignmentOverride;
+ bool RealignStack;
+ bool DisableJumpTables;
+ bool StrongPHIElim;
+ bool HasDivModLibcall;
+ bool AsmVerbosityDefault(false);
+ bool EnableSegmentedStacks;
+}
+
+static cl::opt<bool, true>
+PrintCode("print-machineinstrs",
+ cl::desc("Print generated machine code"),
+ cl::location(PrintMachineCode), cl::init(false));
+static cl::opt<bool, true>
+DisableFPElim("disable-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization"),
+ cl::location(NoFramePointerElim),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+ cl::location(NoFramePointerElimNonLeaf),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableExcessPrecision("disable-excess-fp-precision",
+ cl::desc("Disable optimizations that may increase FP precision"),
+ cl::location(NoExcessFPPrecision),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableFPMAD("enable-fp-mad",
+ cl::desc("Enable less precise MAD instructions to be generated"),
+ cl::location(LessPreciseFPMADOption),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::location(UnsafeFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::location(NoInfsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::location(NoNaNsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+ cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::location(HonorSignDependentRoundingFPMathOption),
+ cl::init(false));
+static cl::opt<bool, true>
+GenerateSoftFloatCalls("soft-float",
+ cl::desc("Generate software floating point library calls"),
+ cl::location(UseSoftFloat),
+ cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+ cl::desc("Choose float ABI type"),
+ cl::location(FloatABIType),
+ cl::init(FloatABI::Default),
+ cl::values(
+ clEnumValN(FloatABI::Default, "default",
+ "Target default float ABI type"),
+ clEnumValN(FloatABI::Soft, "soft",
+ "Soft float ABI (implied by -soft-float)"),
+ clEnumValN(FloatABI::Hard, "hard",
+ "Hard float ABI (uses FP registers)"),
+ clEnumValEnd));
+static cl::opt<bool, true>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::location(NoZerosInBSS),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableJITExceptionHandling("jit-enable-eh",
+ cl::desc("Emit exception handling information"),
+ cl::location(JITExceptionHandling),
+ cl::init(false));
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool, true>
+EmitJitDebugInfo("jit-emit-debug",
+ cl::desc("Emit debug information to debugger"),
+ cl::location(JITEmitDebugInfo),
+ cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+static cl::opt<bool, true>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+ cl::Hidden,
+ cl::desc("Emit debug info objfiles to disk"),
+ cl::location(JITEmitDebugInfoToDisk),
+ cl::init(false));
+
+static cl::opt<bool, true>
+EnableGuaranteedTailCallOpt("tailcallopt",
+ cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+ cl::location(GuaranteedTailCallOpt),
+ cl::init(false));
+static cl::opt<unsigned, true>
+OverrideStackAlignment("stack-alignment",
+ cl::desc("Override default stack alignment"),
+ cl::location(StackAlignmentOverride),
+ cl::init(0));
+static cl::opt<bool, true>
+EnableRealignStack("realign-stack",
+ cl::desc("Realign stack if needed"),
+ cl::location(RealignStack),
+ cl::init(true));
+static cl::opt<bool, true>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables",
+ cl::desc("Do not generate jump tables."),
+ cl::location(DisableJumpTables),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+ cl::desc("Use strong PHI elimination."),
+ cl::location(StrongPHIElim),
+ cl::init(false));
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+ cl::desc("Emit a call to trap function rather than a trap instruction"),
+ cl::init(""));
+static cl::opt<bool>
+DataSections("fdata-sections",
+ cl::desc("Emit data into separate sections"),
+ cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+ cl::desc("Emit functions into separate sections"),
+ cl::init(false));
+static cl::opt<bool, true>
+SegmentedStacks("segmented-stacks",
+ cl::desc("Use segmented stacks if possible."),
+ cl::location(EnableSegmentedStacks),
+ cl::init(false));
+
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS)
+ : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
+ CodeGenInfo(0), AsmInfo(0),
+ MCRelaxAll(false),
+ MCNoExecStack(false),
+ MCSaveTempLabels(false),
+ MCUseLoc(true),
+ MCUseCFI(true) {
+ // Typically it will be subtargets that will adjust FloatABIType from Default
+ // to Soft or Hard.
+ if (UseSoftFloat)
+ FloatABIType = FloatABI::Soft;
+}
+
+TargetMachine::~TargetMachine() {
+ delete CodeGenInfo;
+ delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() const {
+ if (!CodeGenInfo)
+ return Reloc::Default;
+ return CodeGenInfo->getRelocationModel();
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() const {
+ if (!CodeGenInfo)
+ return CodeModel::Default;
+ return CodeGenInfo->getCodeModel();
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+ return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+ AsmVerbosityDefault = V;
+}
+
+bool TargetMachine::getFunctionSections() {
+ return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+ return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+ FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+ DataSections = V;
+}
+
+namespace llvm {
+ /// DisableFramePointerElim - This returns true if frame pointer elimination
+ /// optimization should be disabled for the given machine function.
+ bool DisableFramePointerElim(const MachineFunction &MF) {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+ }
+
+ /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+ /// is specified on the command line. When this flag is off(default), the
+ /// code generator is not allowed to generate mad (multiply add) if the
+ /// result is "less precise" than doing those operations individually.
+ bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
+
+ /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+ /// that the rounding mode of the FPU can change from its default.
+ bool HonorSignDependentRoundingFPMath() {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ }
+
+ /// getTrapFunctionName - If this returns a non-empty string, this means isel
+ /// should lower Intrinsic::trap to a call to the specified function name
+ /// instead of an ISD::TRAP node.
+ StringRef getTrapFunctionName() {
+ return TrapFuncName;
+ }
+}
diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
new file mode 100644
index 0000000..67239b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
@@ -0,0 +1,122 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+ regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *subregindexnames)
+ : InfoDesc(ID), SubRegIndexNames(subregindexnames),
+ RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
+ else
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = 0;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+ for (unsigned i = 0; i != Order.size(); ++i)
+ R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(getNumRegs());
+ if (RC) {
+ getAllocatableSetForRC(MF, RC, Allocatable);
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ if ((*I)->isAllocatable())
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ }
+
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable &= Reserved.flip();
+
+ return Allocatable;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) const {
+ // First take care of the trivial cases.
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return 0;
+
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ const unsigned *SubA = A->getSubClassMask();
+ const unsigned *SubB = B->getSubClassMask();
+
+ // We could start the search from max(A.ID, B.ID), but we are only going to
+ // execute 2-3 iterations anyway.
+ for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32)
+ if (unsigned Common = *SubA++ & *SubB++)
+ return getRegClass(Base + CountTrailingZeros_32(Common));
+
+ // No common sub-class exists.
+ return NULL;
+}
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
new file mode 100644
index 0000000..59ffdea
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
@@ -0,0 +1,33 @@
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtargetInfo Class
+//
+TargetSubtargetInfo::TargetSubtargetInfo() {}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
+
+bool TargetSubtargetInfo::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = ANTIDEP_NONE;
+ CriticalPathRCs.clear();
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
new file mode 100644
index 0000000..1eaccff
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -0,0 +1,165 @@
+//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+namespace {
+
+class X86AsmLexer : public MCTargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ bool tentativeIsValid;
+ AsmToken tentativeToken;
+
+ const AsmToken &lexTentative() {
+ tentativeToken = getLexer()->Lex();
+ tentativeIsValid = true;
+ return tentativeToken;
+ }
+
+ const AsmToken &lexDefinite() {
+ if (tentativeIsValid) {
+ tentativeIsValid = false;
+ return tentativeToken;
+ }
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenATT();
+ AsmToken LexTokenIntel();
+protected:
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenATT();
+ case 1:
+ return LexTokenIntel();
+ }
+ }
+public:
+ X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
+ : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
+ }
+};
+
+} // end anonymous namespace
+
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
+
+AsmToken X86AsmLexer::LexTokenATT() {
+ AsmToken lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default:
+ return lexedToken;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return lexedToken;
+
+ case AsmToken::Percent: {
+ const AsmToken &nextToken = lexTentative();
+ if (nextToken.getKind() != AsmToken::Identifier)
+ return lexedToken;
+
+
+ if (unsigned regID = MatchRegisterName(nextToken.getString())) {
+ lexDefinite();
+
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
+ StringRef regStr(lexedToken.getString().data(),
+ lexedToken.getString().size() +
+ nextToken.getString().size());
+
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(regID));
+ }
+
+ // Match register name failed. If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (nextToken.getString().size() == 3 &&
+ nextToken.getString().startswith("db")) {
+ int RegNo = -1;
+ switch (nextToken.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != -1) {
+ lexDefinite();
+
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
+ StringRef regStr(lexedToken.getString().data(),
+ lexedToken.getString().size() +
+ nextToken.getString().size());
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(RegNo));
+ }
+ }
+
+
+ return lexedToken;
+ }
+ }
+}
+
+AsmToken X86AsmLexer::LexTokenIntel() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch(lexedToken.getKind()) {
+ default:
+ return lexedToken;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return lexedToken;
+ case AsmToken::Identifier: {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+
+ if (regID)
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ return lexedToken;
+ }
+ }
+}
+
+extern "C" void LLVMInitializeX86AsmLexer() {
+ RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
+ RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
+}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 0000000..cb4f15f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,1189 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86ATTAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+private:
+ MCAsmParser &getParser() const { return Parser; }
+
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ X86Operand *ParseOperand();
+ X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+
+ /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ bool isSrcOp(X86Operand &Op);
+
+ /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+ /// or %es:(%edi) in 32bit mode.
+ bool isDstOp(X86Operand &Op);
+
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
+ setAvailableFeatures(FB);
+ }
+
+ /// @name Auto-generated Matcher Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
+
+ /// }
+
+public:
+ X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Register,
+ Immediate,
+ Memory
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNo;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ } Mem;
+ };
+
+ X86Operand(KindTy K, SMLoc Start, SMLoc End)
+ : Kind(K), StartLoc(Start), EndLoc(End) {}
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ virtual void print(raw_ostream &OS) const {}
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+ void setTokenValue(StringRef Value) {
+ assert(Kind == Token && "Invalid access!");
+ Tok.Data = Value.data();
+ Tok.Length = Value.size();
+ }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNo;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getMemDisp() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Disp;
+ }
+ unsigned getMemSegReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.SegReg;
+ }
+ unsigned getMemBaseReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.BaseReg;
+ }
+ unsigned getMemIndexReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.IndexReg;
+ }
+ unsigned getMemScale() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Scale;
+ }
+
+ bool isToken() const {return Kind == Token; }
+
+ bool isImm() const { return Kind == Immediate; }
+
+ bool isImmSExti16i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti32i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmZExtu32u8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (Value <= 0x00000000000000FFULL);
+ }
+ bool isImmSExti64i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i32() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+
+ bool isMem() const { return Kind == Memory; }
+
+ bool isAbsMem() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1;
+ }
+
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 5) && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+ Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+ Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+ addExpr(Inst, getMemDisp());
+ Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+ }
+
+ void addAbsMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 1) && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ }
+
+ static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+ X86Operand *Res = new X86Operand(Token, Loc, Loc);
+ Res->Tok.Data = Str.data();
+ Res->Tok.Length = Str.size();
+ return Res;
+ }
+
+ static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
+ X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+ Res->Reg.RegNo = RegNo;
+ return Res;
+ }
+
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
+ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+ Res->Imm.Val = Val;
+ return Res;
+ }
+
+ /// Create an absolute memory operand.
+ static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
+ SMLoc EndLoc) {
+ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ Res->Mem.SegReg = 0;
+ Res->Mem.Disp = Disp;
+ Res->Mem.BaseReg = 0;
+ Res->Mem.IndexReg = 0;
+ Res->Mem.Scale = 1;
+ return Res;
+ }
+
+ /// Create a generalized memory operand.
+ static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
+ // We should never just have a displacement, that should be parsed as an
+ // absolute memory operand.
+ assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+ // The scale should always be one of {1,2,4,8}.
+ assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+ "Invalid scale!");
+ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ Res->Mem.SegReg = SegReg;
+ Res->Mem.Disp = Disp;
+ Res->Mem.BaseReg = BaseReg;
+ Res->Mem.IndexReg = IndexReg;
+ Res->Mem.Scale = Scale;
+ return Res;
+ }
+};
+
+} // end anonymous namespace.
+
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+ unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
+
+ return (Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+ unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
+
+ return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
+
+bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = 0;
+ const AsmToken &TokPercent = Parser.getTok();
+ assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+ StartLoc = TokPercent.getLoc();
+ Parser.Lex(); // Eat percent token.
+
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(Tok.getLoc(), "invalid register name");
+
+ RegNo = MatchRegisterName(Tok.getString());
+
+ // If the match failed, try the register name as lowercase.
+ if (RegNo == 0)
+ RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
+ if (!is64BitMode()) {
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
+ // checked.
+ // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
+ // REX prefix.
+ if (RegNo == X86::RIZ ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
+ X86II::isX86_64NonExtLowByteReg(RegNo) ||
+ X86II::isX86_64ExtendedReg(RegNo))
+ return Error(Tok.getLoc(), "register %"
+ + Tok.getString() + " is only available in 64-bit mode");
+ }
+
+ // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+ if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
+ RegNo = X86::ST0;
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat 'st'
+
+ // Check to see if we have '(4)' after %st.
+ if (getLexer().isNot(AsmToken::LParen))
+ return false;
+ // Lex the paren.
+ getParser().Lex();
+
+ const AsmToken &IntTok = Parser.getTok();
+ if (IntTok.isNot(AsmToken::Integer))
+ return Error(IntTok.getLoc(), "expected stack index");
+ switch (IntTok.getIntVal()) {
+ case 0: RegNo = X86::ST0; break;
+ case 1: RegNo = X86::ST1; break;
+ case 2: RegNo = X86::ST2; break;
+ case 3: RegNo = X86::ST3; break;
+ case 4: RegNo = X86::ST4; break;
+ case 5: RegNo = X86::ST5; break;
+ case 6: RegNo = X86::ST6; break;
+ case 7: RegNo = X86::ST7; break;
+ default: return Error(IntTok.getLoc(), "invalid stack index");
+ }
+
+ if (getParser().Lex().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat ')'
+ return false;
+ }
+
+ // If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (RegNo == 0 && Tok.getString().size() == 3 &&
+ Tok.getString().startswith("db")) {
+ switch (Tok.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != 0) {
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat it.
+ return false;
+ }
+ }
+
+ if (RegNo == 0)
+ return Error(Tok.getLoc(), "invalid register name");
+
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat identifier token.
+ return false;
+}
+
+X86Operand *X86ATTAsmParser::ParseOperand() {
+ switch (getLexer().getKind()) {
+ default:
+ // Parse a memory operand with no segment register.
+ return ParseMemOperand(0, Parser.getTok().getLoc());
+ case AsmToken::Percent: {
+ // Read the register.
+ unsigned RegNo;
+ SMLoc Start, End;
+ if (ParseRegister(RegNo, Start, End)) return 0;
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "%eiz and %riz can only be used as index registers");
+ return 0;
+ }
+
+ // If this is a segment register followed by a ':', then this is the start
+ // of a memory reference, otherwise this is a normal register reference.
+ if (getLexer().isNot(AsmToken::Colon))
+ return X86Operand::CreateReg(RegNo, Start, End);
+
+
+ getParser().Lex(); // Eat the colon.
+ return ParseMemOperand(RegNo, Start);
+ }
+ case AsmToken::Dollar: {
+ // $42 -> immediate.
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ Parser.Lex();
+ const MCExpr *Val;
+ if (getParser().ParseExpression(Val, End))
+ return 0;
+ return X86Operand::CreateImm(Val, Start, End);
+ }
+ }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+
+ // We have to disambiguate a parenthesized expression "(4+5)" from the start
+ // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
+ // only way to do this without lookahead is to eat the '(' and see what is
+ // after it.
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ if (getLexer().isNot(AsmToken::LParen)) {
+ SMLoc ExprEnd;
+ if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg == 0)
+ return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ }
+
+ // Eat the '('.
+ Parser.Lex();
+ } else {
+ // Okay, we have a '('. We don't know if this is an expression or not, but
+ // so we have to eat the ( to see beyond it.
+ SMLoc LParenLoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '('.
+
+ if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+ // Nothing to do here, fall into the code below with the '(' part of the
+ // memory operand consumed.
+ } else {
+ SMLoc ExprEnd;
+
+ // It must be an parenthesized expression, parse it now.
+ if (getParser().ParseParenExpression(Disp, ExprEnd))
+ return 0;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg == 0)
+ return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ }
+
+ // Eat the '('.
+ Parser.Lex();
+ }
+ }
+
+ // If we reached here, then we just ate the ( of the memory operand. Process
+ // the rest of the memory operand.
+ unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+
+ if (getLexer().is(AsmToken::Percent)) {
+ SMLoc L;
+ if (ParseRegister(BaseReg, L, L)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(L, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+ }
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Following the comma we should have either an index register, or a scale
+ // value. We don't support the later form, but we want to parse it
+ // correctly.
+ //
+ // Not that even though it would be completely consistent to support syntax
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+ if (getLexer().is(AsmToken::Percent)) {
+ SMLoc L;
+ if (ParseRegister(IndexReg, L, L)) return 0;
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ // Parse the scale amount:
+ // ::= ',' [scale-expression]
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(),
+ "expected comma in scale expression");
+ return 0;
+ }
+ Parser.Lex(); // Eat the comma.
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ int64_t ScaleVal;
+ if (getParser().ParseAbsoluteExpression(ScaleVal))
+ return 0;
+
+ // Validate the scale amount.
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+ Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+ return 0;
+ }
+ Scale = (unsigned)ScaleVal;
+ }
+ }
+ } else if (getLexer().isNot(AsmToken::RParen)) {
+ // A scale amount without an index is ignored.
+ // index.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ int64_t Value;
+ if (getParser().ParseAbsoluteExpression(Value))
+ return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
+ }
+ }
+
+ // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
+ return 0;
+ }
+ SMLoc MemEnd = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+ MemStart, MemEnd);
+}
+
+bool X86ATTAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef PatchedName = Name;
+
+ // FIXME: Hack to recognize setneb as setne.
+ if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ PatchedName != "setb" && PatchedName != "setnb")
+ PatchedName = PatchedName.substr(0, Name.size()-1);
+
+ // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ const MCExpr *ExtraImmOp = 0;
+ if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
+ (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ bool IsVCMP = PatchedName.startswith("vcmp");
+ unsigned SSECCIdx = IsVCMP ? 4 : 3;
+ unsigned SSEComparisonCode = StringSwitch<unsigned>(
+ PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Case("eq_uq", 8)
+ .Case("nge", 9)
+ .Case("ngt", 0x0A)
+ .Case("false", 0x0B)
+ .Case("neq_oq", 0x0C)
+ .Case("ge", 0x0D)
+ .Case("gt", 0x0E)
+ .Case("true", 0x0F)
+ .Case("eq_os", 0x10)
+ .Case("lt_oq", 0x11)
+ .Case("le_oq", 0x12)
+ .Case("unord_s", 0x13)
+ .Case("neq_us", 0x14)
+ .Case("nlt_uq", 0x15)
+ .Case("nle_uq", 0x16)
+ .Case("ord_s", 0x17)
+ .Case("eq_us", 0x18)
+ .Case("nge_uq", 0x19)
+ .Case("ngt_uq", 0x1A)
+ .Case("false_os", 0x1B)
+ .Case("neq_os", 0x1C)
+ .Case("ge_oq", 0x1D)
+ .Case("gt_oq", 0x1E)
+ .Case("true_us", 0x1F)
+ .Default(~0U);
+ if (SSEComparisonCode != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+ getParser().getContext());
+ if (PatchedName.endswith("ss")) {
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+ } else if (PatchedName.endswith("sd")) {
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+ } else if (PatchedName.endswith("ps")) {
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+ } else {
+ assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+ }
+ }
+ }
+
+ Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+
+ if (ExtraImmOp)
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
+
+ // Determine whether this is an instruction prefix.
+ bool isPrefix =
+ Name == "lock" || Name == "rep" ||
+ Name == "repe" || Name == "repz" ||
+ Name == "repne" || Name == "repnz" ||
+ Name == "rex64" || Name == "data16";
+
+
+ // This does the actual operand parsing. Don't parse any more if we have a
+ // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+ // just want to parse the "lock" as the first instruction and the "incl" as
+ // the next one.
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
+
+ // Parse '*' modifier.
+ if (getLexer().is(AsmToken::Star)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("*", Loc));
+ Parser.Lex(); // Eat the star.
+ }
+
+ // Read the first operand.
+ if (X86Operand *Op = ParseOperand())
+ Operands.push_back(Op);
+ else {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (X86Operand *Op = ParseOperand())
+ Operands.push_back(Op);
+ else {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex(); // Consume the EndOfStatement
+ else if (isPrefix && getLexer().is(AsmToken::Slash))
+ Parser.Lex(); // Consume the prefix separator Slash
+
+ // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+ // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
+ // documented form in various unofficial manuals, so a lot of code uses it.
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+ // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+ if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+ // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+ if (Name.startswith("ins") && Operands.size() == 3 &&
+ (Name == "insb" || Name == "insw" || Name == "insl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+ if (Name.startswith("outs") && Operands.size() == 3 &&
+ (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+ if (Name.startswith("movs") && Operands.size() == 3 &&
+ (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+ (is64BitMode() && Name == "movsq"))) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+ // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+ if (Name.startswith("lods") && Operands.size() == 3 &&
+ (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+ Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isSrcOp(*Op1) && Op2->isReg()) {
+ const char *ins;
+ unsigned reg = Op2->getReg();
+ bool isLods = Name == "lods";
+ if (reg == X86::AL && (isLods || Name == "lodsb"))
+ ins = "lodsb";
+ else if (reg == X86::AX && (isLods || Name == "lodsw"))
+ ins = "lodsw";
+ else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+ ins = "lodsl";
+ else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+ ins = "lodsq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+ // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+ if (Name.startswith("stos") && Operands.size() == 3 &&
+ (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+ Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isDstOp(*Op2) && Op1->isReg()) {
+ const char *ins;
+ unsigned reg = Op1->getReg();
+ bool isStos = Name == "stos";
+ if (reg == X86::AL && (isStos || Name == "stosb"))
+ ins = "stosb";
+ else if (reg == X86::AX && (isStos || Name == "stosw"))
+ ins = "stosw";
+ else if (reg == X86::EAX && (isStos || Name == "stosl"))
+ ins = "stosl";
+ else if (reg == X86::RAX && (isStos || Name == "stosq"))
+ ins = "stosq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+
+ // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
+ // "shift <op>".
+ if ((Name.startswith("shr") || Name.startswith("sar") ||
+ Name.startswith("shl") || Name.startswith("sal") ||
+ Name.startswith("rcl") || Name.startswith("rcr") ||
+ Name.startswith("rol") || Name.startswith("ror")) &&
+ Operands.size() == 3) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ }
+ }
+
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (Name == "int" && Operands.size() == 2) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ }
+ }
+
+ return false;
+}
+
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+ // First, handle aliases that expand to multiple instructions.
+ // FIXME: This should be replaced with a real .td file alias mechanism.
+ // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+ // call.
+ if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+ Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+ Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+ Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ MCInst Inst;
+ Inst.setOpcode(X86::WAIT);
+ Out.EmitInstruction(Inst);
+
+ const char *Repl =
+ StringSwitch<const char*>(Op->getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(0);
+ assert(Repl && "Unknown wait-prefixed instruction");
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+ }
+
+ bool WasOriginallyInvalidOperand = false;
+ unsigned OrigErrorInfo;
+ MCInst Inst;
+
+ // First, try a direct match.
+ switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ default: break;
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ WasOriginallyInvalidOperand = true;
+ break;
+ case Match_MnemonicFail:
+ break;
+ }
+
+ // FIXME: Ideally, we would only attempt suffix matches for things which are
+ // valid prefixes, and we could just infer the right unambiguous
+ // type. However, that requires substantially more matcher support than the
+ // following hack.
+
+ // Change the operand to point to a temporary token.
+ StringRef Base = Op->getToken();
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
+
+ // If this instruction starts with an 'f', then it is a floating point stack
+ // instruction. These come in up to three forms for 32-bit, 64-bit, and
+ // 80-bit floating point, which use the suffixes s,l,t respectively.
+ //
+ // Otherwise, we assume that this may be an integer instruction, which comes
+ // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+ const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+
+ // Check for the various suffix matches.
+ Tmp[Base.size()] = Suffixes[0];
+ unsigned ErrorInfoIgnore;
+ unsigned Match1, Match2, Match3, Match4;
+
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[1];
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[2];
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[3];
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+
+ // Restore the old token.
+ Op->setTokenValue(Base);
+
+ // If exactly one matched, then we treat that as a successful match (and the
+ // instruction will already have been filled in correctly, since the failing
+ // matches won't have modified it).
+ unsigned NumSuccessfulMatches =
+ (Match1 == Match_Success) + (Match2 == Match_Success) +
+ (Match3 == Match_Success) + (Match4 == Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ Out.EmitInstruction(Inst);
+ return false;
+ }
+
+ // Otherwise, the match failed, try to produce a decent error message.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (NumSuccessfulMatches > 1) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+ if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+ if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+ if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str());
+ return true;
+ }
+
+ // Okay, we know that none of the variants matched successfully.
+
+ // If all of the instructions reported an invalid mnemonic, then the original
+ // mnemonic was invalid.
+ if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+ (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (!WasOriginallyInvalidOperand) {
+ Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+ return true;
+ }
+
+ // Recover location info for the operand if we know which was the problem.
+ SMLoc ErrorLoc = IDLoc;
+ if (OrigErrorInfo != ~0U) {
+ if (OrigErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+ (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+ (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+ Error(IDLoc, "invalid operand for instruction");
+ return true;
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal.startswith(".code"))
+ return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveCode
+/// ::= .code32 | .code64
+bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+ if (IDVal == ".code32") {
+ Parser.Lex();
+ if (is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+ } else if (IDVal == ".code64") {
+ Parser.Lex();
+ if (!is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
+ }
+ } else {
+ return Error(L, "unexpected directive " + IDVal);
+ }
+
+ return false;
+}
+
+
+extern "C" void LLVMInitializeX86AsmLexer();
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+ RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+ RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+ LLVMInitializeX86AsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "X86GenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 0000000..3aacb20
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,607 @@
+//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+// MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+#include "X86GenEDInfo.inc"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+void x86DisassemblerDebug(const char *file,
+ unsigned line,
+ const char *s) {
+ dbgs() << file << ":" << line << ": " << s;
+}
+
+#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+
+namespace llvm {
+
+// Fill-ins to make the compiler happy. These constants are never actually
+// assigned; they are just filler to make an automatically-generated switch
+// statement work.
+namespace X86 {
+ enum {
+ BX_SI = 500,
+ BX_DI = 501,
+ BP_SI = 502,
+ BP_DI = 503,
+ sib = 504,
+ sib64 = 505
+ };
+}
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+}
+
+static bool translateInstruction(MCInst &target,
+ InternalInstruction &source);
+
+X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) :
+ MCDisassembler(STI),
+ fMode(mode) {
+}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+}
+
+EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+ return instInfoX86;
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+/// MemoryObject.
+///
+/// @param arg - The generic callback parameter. In this case, this should
+/// be a pointer to a MemoryObject.
+/// @param byte - A pointer to the byte to be read.
+/// @param address - The address to be read.
+static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
+ MemoryObject* region = static_cast<MemoryObject*>(arg);
+ return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+/// raw_ostream.
+///
+/// @param arg - The generic callback parameter. This should be a pointe
+/// to a raw_ostream.
+/// @param log - A string to be logged. logger() adds a newline.
+static void logger(void* arg, const char* log) {
+ if (!arg)
+ return;
+
+ raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+ vStream << log << "\n";
+}
+
+//
+// Public interface for the disassembler
+//
+
+MCDisassembler::DecodeStatus
+X86GenericDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ InternalInstruction internalInstr;
+
+ dlog_t loggerFn = logger;
+ if (&vStream == &nulls())
+ loggerFn = 0; // Disable logging completely if it's going to nulls().
+
+ int ret = decodeInstruction(&internalInstr,
+ regionReader,
+ (void*)&region,
+ loggerFn,
+ (void*)&vStream,
+ address,
+ fMode);
+
+ if (ret) {
+ size = internalInstr.readerCursor - address;
+ return Fail;
+ }
+ else {
+ size = internalInstr.length;
+ return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
+ }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+/// register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param reg - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+ uint8_t llvmRegnums[] = {
+ ALL_REGS
+ 0
+ };
+#undef ENTRY
+
+ uint8_t llvmRegnum = llvmRegnums[reg];
+ mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// translateImmediate - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param immediate - The immediate value to append.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+ const OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = operand.type;
+
+ if (type == TYPE_RELv) {
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 1:
+ type = TYPE_MOFFS8;
+ break;
+ case 2:
+ type = TYPE_MOFFS16;
+ break;
+ case 4:
+ type = TYPE_MOFFS32;
+ break;
+ case 8:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+ // By default sign-extend all X86 immediates based on their encoding.
+ else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
+ type == TYPE_IMM64) {
+ uint32_t Opcode = mcInst.getOpcode();
+ switch (operand.encoding) {
+ default:
+ break;
+ case ENCODING_IB:
+ // Special case those X86 instructions that use the imm8 as a set of
+ // bits, bit count, etc. and are not sign-extend.
+ if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
+ Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
+ Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
+ Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
+ Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
+ Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
+ Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
+ Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
+ Opcode != X86::VINSERTPSrr)
+ type = TYPE_MOFFS8;
+ break;
+ case ENCODING_IW:
+ type = TYPE_MOFFS16;
+ break;
+ case ENCODING_ID:
+ type = TYPE_MOFFS32;
+ break;
+ case ENCODING_IO:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+
+ switch (type) {
+ case TYPE_XMM128:
+ mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
+ return;
+ case TYPE_XMM256:
+ mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
+ return;
+ case TYPE_MOFFS8:
+ case TYPE_REL8:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_MOFFS16:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_MOFFS32:
+ case TYPE_REL32:
+ case TYPE_REL64:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case TYPE_MOFFS64:
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
+ mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction to extract the R/M field
+/// from.
+/// @return - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
+ InternalInstruction &insn) {
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ debug("A R/M register operand may not have a SIB byte");
+ return true;
+ }
+
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected EA base register");
+ return true;
+ case EA_BASE_NONE:
+ debug("EA_BASE_NONE for ModR/M base");
+ return true;
+#define ENTRY(x) case EA_BASE_##x:
+ ALL_EA_BASES
+#undef ENTRY
+ debug("A R/M register operand may not have a base; "
+ "the operand must be a register.");
+ return true;
+#define ENTRY(x) \
+ case EA_REG_##x: \
+ mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+ ALL_REGS
+#undef ENTRY
+ }
+
+ return false;
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+/// fields of an internal instruction (and possibly its SIB byte) to a memory
+/// operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
+ // Addresses in an MCInst are represented as five operands:
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // SIB base
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // scale amount
+ // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
+ // the index (which is multiplied by the
+ // scale amount)
+ // 4. displacement (immediate) 0, or the displacement if there is one
+ // 5. segmentreg (register) x86_registerNONE for now, but could be set
+ // if we have segment overrides
+
+ MCOperand baseReg;
+ MCOperand scaleAmount;
+ MCOperand indexReg;
+ MCOperand displacement;
+ MCOperand segmentReg;
+
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ if (insn.sibBase != SIB_BASE_NONE) {
+ switch (insn.sibBase) {
+ default:
+ debug("Unexpected sibBase");
+ return true;
+#define ENTRY(x) \
+ case SIB_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_SIB_BASES
+#undef ENTRY
+ }
+ } else {
+ baseReg = MCOperand::CreateReg(0);
+ }
+
+ if (insn.sibIndex != SIB_INDEX_NONE) {
+ switch (insn.sibIndex) {
+ default:
+ debug("Unexpected sibIndex");
+ return true;
+#define ENTRY(x) \
+ case SIB_INDEX_##x: \
+ indexReg = MCOperand::CreateReg(X86::x); break;
+ EA_BASES_32BIT
+ EA_BASES_64BIT
+#undef ENTRY
+ }
+ } else {
+ indexReg = MCOperand::CreateReg(0);
+ }
+
+ scaleAmount = MCOperand::CreateImm(insn.sibScale);
+ } else {
+ switch (insn.eaBase) {
+ case EA_BASE_NONE:
+ if (insn.eaDisplacement == EA_DISP_NONE) {
+ debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+ return true;
+ }
+ if (insn.mode == MODE_64BIT)
+ baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+ else
+ baseReg = MCOperand::CreateReg(0);
+
+ indexReg = MCOperand::CreateReg(0);
+ break;
+ case EA_BASE_BX_SI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BX_DI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ case EA_BASE_BP_SI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BP_DI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ default:
+ indexReg = MCOperand::CreateReg(0);
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected eaBase");
+ return true;
+ // Here, we will use the fill-ins defined above. However,
+ // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+ // sib and sib64 were handled in the top-level if, so they're only
+ // placeholders to keep the compiler happy.
+#define ENTRY(x) \
+ case EA_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+ ALL_REGS
+#undef ENTRY
+ debug("A R/M memory operand may not be a register; "
+ "the base field must be a base.");
+ return true;
+ }
+ }
+
+ scaleAmount = MCOperand::CreateImm(1);
+ }
+
+ displacement = MCOperand::CreateImm(insn.displacement);
+
+ static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+ 0, // SEG_OVERRIDE_NONE
+ X86::CS,
+ X86::SS,
+ X86::DS,
+ X86::ES,
+ X86::FS,
+ X86::GS
+ };
+
+ segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+
+ mcInst.addOperand(baseReg);
+ mcInst.addOperand(scaleAmount);
+ mcInst.addOperand(indexReg);
+ mcInst.addOperand(displacement);
+ mcInst.addOperand(segmentReg);
+ return false;
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+/// byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ switch (operand.type) {
+ default:
+ debug("Unexpected type for a R/M operand");
+ return true;
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ case TYPE_Rv:
+ case TYPE_MM:
+ case TYPE_MM32:
+ case TYPE_MM64:
+ case TYPE_XMM:
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ case TYPE_XMM256:
+ case TYPE_DEBUGREG:
+ case TYPE_CONTROLREG:
+ return translateRMRegister(mcInst, insn);
+ case TYPE_M:
+ case TYPE_M8:
+ case TYPE_M16:
+ case TYPE_M32:
+ case TYPE_M64:
+ case TYPE_M128:
+ case TYPE_M256:
+ case TYPE_M512:
+ case TYPE_Mv:
+ case TYPE_M32FP:
+ case TYPE_M64FP:
+ case TYPE_M80FP:
+ case TYPE_M16INT:
+ case TYPE_M32INT:
+ case TYPE_M64INT:
+ case TYPE_M1616:
+ case TYPE_M1632:
+ case TYPE_M1664:
+ case TYPE_LEA:
+ return translateRMMemory(mcInst, insn);
+ }
+}
+
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+/// LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param stackPos - The stack position to translate.
+/// @return - 0 on success; nonzero otherwise.
+static bool translateFPRegister(MCInst &mcInst,
+ uint8_t stackPos) {
+ if (stackPos >= 8) {
+ debug("Invalid FP stack position");
+ return true;
+ }
+
+ mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+
+ return false;
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction
+/// to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ switch (operand.encoding) {
+ default:
+ debug("Unhandled operand encoding during translation");
+ return true;
+ case ENCODING_REG:
+ translateRegister(mcInst, insn.reg);
+ return false;
+ case ENCODING_RM:
+ return translateRM(mcInst, operand, insn);
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ debug("Translation of code offsets isn't supported.");
+ return true;
+ case ENCODING_IB:
+ case ENCODING_IW:
+ case ENCODING_ID:
+ case ENCODING_IO:
+ case ENCODING_Iv:
+ case ENCODING_Ia:
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn);
+ return false;
+ case ENCODING_RB:
+ case ENCODING_RW:
+ case ENCODING_RD:
+ case ENCODING_RO:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_I:
+ return translateFPRegister(mcInst, insn.opcodeModifier);
+ case ENCODING_Rv:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_VVVV:
+ translateRegister(mcInst, insn.vvvv);
+ return false;
+ case ENCODING_DUP:
+ return translateOperand(mcInst,
+ insn.spec->operands[operand.type - TYPE_DUP0],
+ insn);
+ }
+}
+
+/// translateInstruction - Translates an internal instruction and all its
+/// operands to an MCInst.
+///
+/// @param mcInst - The MCInst to populate with the instruction's data.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+ InternalInstruction &insn) {
+ if (!insn.spec) {
+ debug("Instruction has no specification");
+ return true;
+ }
+
+ mcInst.setOpcode(insn.instructionID);
+
+ int index;
+
+ insn.numImmediatesTranslated = 0;
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ if (insn.spec->operands[index].encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86_32Disassembler(STI);
+}
+
+static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86_64Disassembler(STI);
+}
+
+extern "C" void LLVMInitializeX86Disassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ createX86_32Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+ createX86_64Disassembler);
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 0000000..6ac9a0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,157 @@
+//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets. The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+// These attributes, recorded in enum attributeBits
+// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
+// provides a mapping from bitmasks to contexts, which are represented by
+// enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is. The
+// disassembler distinguishes four kinds of opcodes, which are enumerated in
+// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
+// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
+// a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
+// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+// ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
+// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+// meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+// (X86DisassemblerDecoderCommon.h) and its type is an entry from
+// OperandType (ibid.). The encoding indicates how to read it from the
+// instruction; the type indicates how to interpret the value once it has
+// been read. For example, a register operand could be stored in the R/M
+// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
+// register, for instance). Given this information, the operands can be
+// extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+// and operands into a format understandable by the client - in this case, an
+// MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself. The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+// adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+// invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+// table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+// factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+// responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ const char* name;
+
+#define INSTRUCTION_IDS \
+ const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+
+class MCInst;
+class MCSubtargetInfo;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+/// All each platform class should have to do is subclass the constructor, and
+/// provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+protected:
+ /// Constructor - Initializes the disassembler.
+ ///
+ /// @param mode - The X86 architecture mode to decode for.
+ X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode);
+public:
+ ~X86GenericDisassembler();
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ DisassemblerMode fMode;
+};
+
+/// X86_16Disassembler - 16-bit X86 disassembler.
+class X86_16Disassembler : public X86GenericDisassembler {
+public:
+ X86_16Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_16BIT) {
+ }
+};
+
+/// X86_16Disassembler - 32-bit X86 disassembler.
+class X86_32Disassembler : public X86GenericDisassembler {
+public:
+ X86_32Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_32BIT) {
+ }
+};
+
+/// X86_16Disassembler - 64-bit X86 disassembler.
+class X86_64Disassembler : public X86GenericDisassembler {
+public:
+ X86_64Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_64BIT) {
+ }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 0000000..f9b0fe5
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1625 @@
+/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <stdarg.h> /* for va_*() */
+#include <stdio.h> /* for vsnprintf() */
+#include <stdlib.h> /* for exit() */
+#include <string.h> /* for memset() */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE 1
+#define FALSE 0
+
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
+
+/*
+ * contextForAttrs - Client for the instruction context table. Takes a set of
+ * attributes and returns the appropriate decode context.
+ *
+ * @param attrMask - Attributes, from the enumeration attributeBits.
+ * @return - The InstructionContext to use when looking up an
+ * an instruction with these attributes.
+ */
+static InstructionContext contextForAttrs(uint8_t attrMask) {
+ return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ * the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ * contextForAttrs.
+ * @param opcode - The last byte of the instruction's opcode, not counting
+ * ModR/M extensions and escapes.
+ * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static int modRMRequired(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode) {
+ const struct ContextDecision* decision = 0;
+
+ switch (type) {
+ case ONEBYTE:
+ decision = &ONEBYTE_SYM;
+ break;
+ case TWOBYTE:
+ decision = &TWOBYTE_SYM;
+ break;
+ case THREEBYTE_38:
+ decision = &THREEBYTE38_SYM;
+ break;
+ case THREEBYTE_3A:
+ decision = &THREEBYTE3A_SYM;
+ break;
+ case THREEBYTE_A6:
+ decision = &THREEBYTEA6_SYM;
+ break;
+ case THREEBYTE_A7:
+ decision = &THREEBYTEA7_SYM;
+ break;
+ }
+
+ return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+ modrm_type != MODRM_ONEENTRY;
+
+ return 0;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ * an instruction.
+ *
+ * @param type - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode - See modRMRequired().
+ * @param modRM - The ModR/M byte if required, or any value if not.
+ * @return - The UID of the instruction, or 0 on failure.
+ */
+static InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ const struct ModRMDecision* dec;
+
+ switch (type) {
+ default:
+ debug("Unknown opcode type");
+ return 0;
+ case ONEBYTE:
+ dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case TWOBYTE:
+ dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_38:
+ dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_3A:
+ dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A6:
+ dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A7:
+ dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ }
+
+ switch (dec->modrm_type) {
+ default:
+ debug("Corrupt table! Unknown modrm_type");
+ return 0;
+ case MODRM_ONEENTRY:
+ return dec->instructionIDs[0];
+ case MODRM_SPLITRM:
+ if (modFromModRM(modRM) == 0x3)
+ return dec->instructionIDs[1];
+ else
+ return dec->instructionIDs[0];
+ case MODRM_FULL:
+ return dec->instructionIDs[modRM];
+ }
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ * that instruction.
+ *
+ * @param uid - The unique ID for the instruction. This should be returned by
+ * decode(); specifierForUID will not check bounds.
+ * @return - A pointer to the specification for that instruction.
+ */
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
+ return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ * byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn - The instruction with the reader function to use. The cursor
+ * for this instruction is advanced.
+ * @param byte - A pointer to a pre-allocated memory buffer to be populated
+ * with the data read.
+ * @return - 0 if the read was successful; nonzero otherwise.
+ */
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+ int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+
+ if (!ret)
+ ++(insn->readerCursor);
+
+ return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn - See consumeByte().
+ * @param byte - See consumeByte().
+ * @return - See consumeByte().
+ */
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+ return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static void unconsumeByte(struct InternalInstruction* insn) {
+ insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type) \
+ static int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((type)byte << ((type)offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
+ }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ * values of various sizes from the instruction's memory and advance the
+ * cursor appropriately. These readers perform endian conversion.
+ *
+ * @param insn - See consumeByte().
+ * @param ptr - A pointer to a pre-allocated memory of appropriate size to
+ * be populated with the data read.
+ * @return - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dbgprintf - Uses the logging function provided by the user to log a single
+ * message, typically without a carriage-return.
+ *
+ * @param insn - The instruction containing the logging function.
+ * @param format - See printf().
+ * @param ... - See printf().
+ */
+static void dbgprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
+ char buffer[256];
+ va_list ap;
+
+ if (!insn->dlog)
+ return;
+
+ va_start(ap, format);
+ (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
+
+ insn->dlog(insn->dlogArg, buffer);
+
+ return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ * location.
+ *
+ * @param insn - The instruction to be marked as having the prefix.
+ * @param prefix - The prefix that is present.
+ * @param location - The location where the prefix is located (in the address
+ * space of the instruction's reader).
+ */
+static void setPrefixPresent(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ insn->prefixPresent[prefix] = 1;
+ insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ * present at a given location.
+ *
+ * @param insn - The instruction to be queried.
+ * @param prefix - The prefix.
+ * @param location - The location to query.
+ * @return - Whether the prefix is at that location.
+ */
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ if (insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ * instruction as having them. Also sets the instruction's default operand,
+ * address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn - The instruction whose prefixes are to be read.
+ * @return - 0 if the instruction could be read until the end of the prefix
+ * bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+ BOOL isPrefix = TRUE;
+ BOOL prefixGroups[4] = { FALSE };
+ uint64_t prefixLocation;
+ uint8_t byte = 0;
+
+ BOOL hasAdSize = FALSE;
+ BOOL hasOpSize = FALSE;
+
+ dbgprintf(insn, "readPrefixes()");
+
+ while (isPrefix) {
+ prefixLocation = insn->readerCursor;
+
+ if (consumeByte(insn, &byte))
+ return -1;
+
+ switch (byte) {
+ case 0xf0: /* LOCK */
+ case 0xf2: /* REPNE/REPNZ */
+ case 0xf3: /* REP or REPE/REPZ */
+ if (prefixGroups[0])
+ dbgprintf(insn, "Redundant Group 1 prefix");
+ prefixGroups[0] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x2e: /* CS segment override -OR- Branch not taken */
+ case 0x36: /* SS segment override -OR- Branch taken */
+ case 0x3e: /* DS segment override */
+ case 0x26: /* ES segment override */
+ case 0x64: /* FS segment override */
+ case 0x65: /* GS segment override */
+ switch (byte) {
+ case 0x2e:
+ insn->segmentOverride = SEG_OVERRIDE_CS;
+ break;
+ case 0x36:
+ insn->segmentOverride = SEG_OVERRIDE_SS;
+ break;
+ case 0x3e:
+ insn->segmentOverride = SEG_OVERRIDE_DS;
+ break;
+ case 0x26:
+ insn->segmentOverride = SEG_OVERRIDE_ES;
+ break;
+ case 0x64:
+ insn->segmentOverride = SEG_OVERRIDE_FS;
+ break;
+ case 0x65:
+ insn->segmentOverride = SEG_OVERRIDE_GS;
+ break;
+ default:
+ debug("Unhandled override");
+ return -1;
+ }
+ if (prefixGroups[1])
+ dbgprintf(insn, "Redundant Group 2 prefix");
+ prefixGroups[1] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x66: /* Operand-size override */
+ if (prefixGroups[2])
+ dbgprintf(insn, "Redundant Group 3 prefix");
+ prefixGroups[2] = TRUE;
+ hasOpSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x67: /* Address-size override */
+ if (prefixGroups[3])
+ dbgprintf(insn, "Redundant Group 4 prefix");
+ prefixGroups[3] = TRUE;
+ hasAdSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ default: /* Not a prefix byte */
+ isPrefix = FALSE;
+ break;
+ }
+
+ if (isPrefix)
+ dbgprintf(insn, "Found prefix 0x%hhx", byte);
+ }
+
+ insn->vexSize = 0;
+
+ if (byte == 0xc4) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
+ insn->vexSize = 3;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexSize == 3) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+ consumeByte(insn, &insn->vexPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+ }
+
+ switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ }
+ }
+ else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
+ insn->vexSize = 2;
+ }
+ else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vexSize == 2) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ }
+
+ switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ }
+ }
+ else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ }
+
+ if (insn->mode == MODE_16BIT) {
+ insn->registerSize = (hasOpSize ? 4 : 2);
+ insn->addressSize = (hasAdSize ? 4 : 2);
+ insn->displacementSize = (hasAdSize ? 4 : 2);
+ insn->immediateSize = (hasOpSize ? 4 : 2);
+ } else if (insn->mode == MODE_32BIT) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 2 : 4);
+ insn->displacementSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else if (insn->mode == MODE_64BIT) {
+ if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+ insn->registerSize = 8;
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = 4;
+ insn->immediateSize = 4;
+ } else if (insn->rexPrefix) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ * extended or escape opcodes).
+ *
+ * @param insn - The instruction whose opcode is to be read.
+ * @return - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {
+ /* Determine the length of the primary opcode */
+
+ uint8_t current;
+
+ dbgprintf(insn, "readOpcode()");
+
+ insn->opcodeType = ONEBYTE;
+
+ if (insn->vexSize == 3)
+ {
+ switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ return -1;
+ case 0:
+ break;
+ case VEX_LOB_0F:
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x38;
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x3a;
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+ else if (insn->vexSize == 2)
+ {
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ }
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x0f) {
+ dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+
+ insn->twoByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x38) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_38;
+ } else if (current == 0x3a) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0xa6) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A6;
+ } else if (current == 0xa7) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A7;
+ } else {
+ dbgprintf(insn, "Didn't find a three-byte escape prefix");
+
+ insn->opcodeType = TWOBYTE;
+ }
+ }
+
+ /*
+ * At this point we have consumed the full opcode.
+ * Anything we consume from here on must be unconsumed.
+ */
+
+ insn->opcode = current;
+
+ return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ * the ModR/M byte as appropriate for extended and escape opcodes,
+ * and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ * instruction.
+ * @param insn - The instruction whose ID is to be determined.
+ * @param attrMask - The attribute mask to search.
+ * @return - 0 if the ModR/M could be read when needed or was not
+ * needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+ struct InternalInstruction* insn,
+ uint8_t attrMask) {
+ BOOL hasModRMExtension;
+
+ uint8_t instructionClass;
+
+ instructionClass = contextForAttrs(attrMask);
+
+ hasModRMExtension = modRMRequired(insn->opcodeType,
+ instructionClass,
+ insn->opcode);
+
+ if (hasModRMExtension) {
+ if (readModRM(insn))
+ return -1;
+
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ insn->modRM);
+ } else {
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ 0);
+ }
+
+ return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
+ return TRUE;
+ if (orig[i] == '\0' || equiv[i] == '\0')
+ return FALSE;
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ continue;
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ continue;
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ continue;
+ return FALSE;
+ }
+ }
+}
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
+ * context for the instruction before doing so.
+ *
+ * @param insn - The instruction whose ID is to be determined.
+ * @return - 0 if the ModR/M could be read when needed or was not needed;
+ * nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn) {
+ uint8_t attrMask;
+ uint16_t instructionID;
+
+ dbgprintf(insn, "getID()");
+
+ attrMask = ATTR_NONE;
+
+ if (insn->mode == MODE_64BIT)
+ attrMask |= ATTR_64BIT;
+
+ if (insn->vexSize) {
+ attrMask |= ATTR_VEX;
+
+ if (insn->vexSize == 3) {
+ switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexSize == 2) {
+ switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vexPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+ }
+
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ /* The following clauses compensate for limitations of the tables. */
+
+ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
+ /*
+ * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
+ * has precedence since there are no L-bit with W-bit entries in the tables.
+ * So if the L-bit isn't significant we should use the W-bit instead.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithWBit;
+ const struct InstructionSpecifier *specWithWBit;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithWBit,
+ insn,
+ (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithWBit = specifierForUID(instructionIDWithWBit);
+
+ if (instructionID != instructionIDWithWBit) {
+ insn->instructionID = instructionIDWithWBit;
+ insn->spec = specWithWBit;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+ /*
+ * The instruction tables make no distinction between instructions that
+ * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+ * particular spot (i.e., many MMX operations). In general we're
+ * conservative, but in the specific case where OpSize is present but not
+ * in the right place we check if there's a 16-bit operation.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithOpsize;
+ const struct InstructionSpecifier *specWithOpsize;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithOpsize,
+ insn,
+ attrMask | ATTR_OPSIZE)) {
+ /*
+ * ModRM required with OpSize but not present; give up and return version
+ * without OpSize set
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithOpsize = specifierForUID(instructionIDWithOpsize);
+
+ if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+ insn->instructionID = instructionIDWithOpsize;
+ insn->spec = specWithOpsize;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
+ insn->rexPrefix & 0x01) {
+ /*
+ * NOOP shouldn't decode as NOOP if REX.b is set. Instead
+ * it should decode as XCHG %r8, %eax.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithNewOpcode;
+ const struct InstructionSpecifier *specWithNewOpcode;
+
+ spec = specifierForUID(instructionID);
+
+ /* Borrow opcode from one of the other XCHGar opcodes */
+ insn->opcode = 0x91;
+
+ if (getIDWithAttrMask(&instructionIDWithNewOpcode,
+ insn,
+ attrMask)) {
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
+
+ /* Change back */
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionIDWithNewOpcode;
+ insn->spec = specWithNewOpcode;
+
+ return 0;
+ }
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(insn->instructionID);
+
+ return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ * instruction.
+ *
+ * @param insn - The instruction whose SIB byte is to be read.
+ * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+ SIBIndex sibIndexBase = 0;
+ SIBBase sibBaseBase = 0;
+ uint8_t index, base;
+
+ dbgprintf(insn, "readSIB()");
+
+ if (insn->consumedSIB)
+ return 0;
+
+ insn->consumedSIB = TRUE;
+
+ switch (insn->addressSize) {
+ case 2:
+ dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+ return -1;
+ break;
+ case 4:
+ sibIndexBase = SIB_INDEX_EAX;
+ sibBaseBase = SIB_BASE_EAX;
+ break;
+ case 8:
+ sibIndexBase = SIB_INDEX_RAX;
+ sibBaseBase = SIB_BASE_RAX;
+ break;
+ }
+
+ if (consumeByte(insn, &insn->sib))
+ return -1;
+
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ switch (index) {
+ case 0x4:
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ default:
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
+ if (insn->sibIndex == SIB_INDEX_sib ||
+ insn->sibIndex == SIB_INDEX_sib64)
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ }
+
+ switch (scaleFromSIB(insn->sib)) {
+ case 0:
+ insn->sibScale = 1;
+ break;
+ case 1:
+ insn->sibScale = 2;
+ break;
+ case 2:
+ insn->sibScale = 4;
+ break;
+ case 3:
+ insn->sibScale = 8;
+ break;
+ }
+
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+
+ switch (base) {
+ case 0x5:
+ switch (modFromModRM(insn->modRM)) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = SIB_BASE_NONE;
+ break;
+ case 0x1:
+ insn->eaDisplacement = EA_DISP_8;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x2:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x3:
+ debug("Cannot have Mod = 0b11 and a SIB byte");
+ return -1;
+ }
+ break;
+ default:
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn - The instruction whose displacement is to be read.
+ * @return - 0 if the displacement byte was successfully read; nonzero
+ * otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {
+ int8_t d8;
+ int16_t d16;
+ int32_t d32;
+
+ dbgprintf(insn, "readDisplacement()");
+
+ if (insn->consumedDisplacement)
+ return 0;
+
+ insn->consumedDisplacement = TRUE;
+
+ switch (insn->eaDisplacement) {
+ case EA_DISP_NONE:
+ insn->consumedDisplacement = FALSE;
+ break;
+ case EA_DISP_8:
+ if (consumeInt8(insn, &d8))
+ return -1;
+ insn->displacement = d8;
+ break;
+ case EA_DISP_16:
+ if (consumeInt16(insn, &d16))
+ return -1;
+ insn->displacement = d16;
+ break;
+ case EA_DISP_32:
+ if (consumeInt32(insn, &d32))
+ return -1;
+ insn->displacement = d32;
+ break;
+ }
+
+ insn->consumedDisplacement = TRUE;
+ return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ * displacement) for an instruction and interprets it.
+ *
+ * @param insn - The instruction whose addressing information is to be read.
+ * @return - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {
+ uint8_t mod, rm, reg;
+
+ dbgprintf(insn, "readModRM()");
+
+ if (insn->consumedModRM)
+ return 0;
+
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
+ insn->consumedModRM = TRUE;
+
+ mod = modFromModRM(insn->modRM);
+ rm = rmFromModRM(insn->modRM);
+ reg = regFromModRM(insn->modRM);
+
+ /*
+ * This goes by insn->registerSize to pick the correct register, which messes
+ * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
+ * fixupReg().
+ */
+ switch (insn->registerSize) {
+ case 2:
+ insn->regBase = MODRM_REG_AX;
+ insn->eaRegBase = EA_REG_AX;
+ break;
+ case 4:
+ insn->regBase = MODRM_REG_EAX;
+ insn->eaRegBase = EA_REG_EAX;
+ break;
+ case 8:
+ insn->regBase = MODRM_REG_RAX;
+ insn->eaRegBase = EA_REG_RAX;
+ break;
+ }
+
+ reg |= rFromREX(insn->rexPrefix) << 3;
+ rm |= bFromREX(insn->rexPrefix) << 3;
+
+ insn->reg = (Reg)(insn->regBase + reg);
+
+ switch (insn->addressSize) {
+ case 2:
+ insn->eaBaseBase = EA_BASE_BX_SI;
+
+ switch (mod) {
+ case 0x0:
+ if (rm == 0x6) {
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ } else {
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_NONE;
+ }
+ break;
+ case 0x1:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_8;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x2:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x3:
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 4:
+ case 8:
+ insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+
+ switch (mod) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = (insn->addressSize == 4 ?
+ EA_BASE_sib : EA_BASE_sib64);
+ readSIB(insn);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x5:
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_32;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ break;
+ }
+ break;
+ case 0x1:
+ case 0x2:
+ insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = EA_BASE_sib;
+ readSIB(insn);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 0x3:
+ insn->eaDisplacement = EA_DISP_NONE;
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ break;
+ }
+ break;
+ } /* switch (insn->addressSize) */
+
+ return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
+ static uint8_t name(struct InternalInstruction *insn, \
+ OperandType type, \
+ uint8_t index, \
+ uint8_t *valid) { \
+ *valid = 1; \
+ switch (type) { \
+ default: \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
+ case TYPE_Rv: \
+ return base + index; \
+ case TYPE_R8: \
+ if (insn->rexPrefix && \
+ index >= 4 && index <= 7) { \
+ return prefix##_SPL + (index - 4); \
+ } else { \
+ return prefix##_AL + index; \
+ } \
+ case TYPE_R16: \
+ return prefix##_AX + index; \
+ case TYPE_R32: \
+ return prefix##_EAX + index; \
+ case TYPE_R64: \
+ return prefix##_RAX + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
+ case TYPE_XMM128: \
+ case TYPE_XMM64: \
+ case TYPE_XMM32: \
+ case TYPE_XMM: \
+ return prefix##_XMM0 + index; \
+ case TYPE_MM64: \
+ case TYPE_MM32: \
+ case TYPE_MM: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_MM0 + index; \
+ case TYPE_SEGMENTREG: \
+ if (index > 5) \
+ *valid = 0; \
+ return prefix##_ES + index; \
+ case TYPE_DEBUGREG: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_DR0 + index; \
+ case TYPE_CONTROLREG: \
+ if (index > 8) \
+ *valid = 0; \
+ return prefix##_CR0 + index; \
+ } \
+ }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ * reg or R/M field. If the operand is an XMM operand, for example, an
+ * operand would be XMM0 instead of AX, which readModRM() would otherwise
+ * misinterpret it as.
+ *
+ * @param insn - The instruction containing the operand.
+ * @param type - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t. The target is set to 1 if the
+ * field is valid for the register class; 0 if not.
+ * @return - The proper value.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ * fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn - See fixup*Value().
+ * @param op - The operand specifier.
+ * @return - 0 if fixup was successful; -1 if the register returned was
+ * invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn,
+ const struct OperandSpecifier *op) {
+ uint8_t valid;
+
+ dbgprintf(insn, "fixupReg()");
+
+ switch ((OperandEncoding)op->encoding) {
+ default:
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_REG:
+ insn->reg = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->reg - insn->regBase,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_RM:
+ if (insn->eaBase >= insn->eaRegBase) {
+ insn->eaBase = (EABase)fixupRMValue(insn,
+ (OperandType)op->type,
+ insn->eaBase - insn->eaRegBase,
+ &valid);
+ if (!valid)
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an
+ * instruction. Handles AddRegFrm instructions.
+ *
+ * @param insn - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ * ModR/M extension; useful for escape opcodes
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeModifier(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readOpcodeModifier()");
+
+ if (insn->consumedOpcodeModifier)
+ return 0;
+
+ insn->consumedOpcodeModifier = TRUE;
+
+ switch (insn->spec->modifierType) {
+ default:
+ debug("Unknown modifier type.");
+ return -1;
+ case MODIFIER_NONE:
+ debug("No modifier but an operand expects one.");
+ return -1;
+ case MODIFIER_OPCODE:
+ insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+ return 0;
+ case MODIFIER_MODRM:
+ insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+ return 0;
+ }
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an
+ * instruction and interprets it appropriately given the operand width.
+ * Handles AddRegFrm instructions.
+ *
+ * @param insn - See readOpcodeModifier().
+ * @param size - The width (in bytes) of the register being specified.
+ * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ * RAX.
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+ dbgprintf(insn, "readOpcodeRegister()");
+
+ if (readOpcodeModifier(insn))
+ return -1;
+
+ if (size == 0)
+ size = insn->registerSize;
+
+ switch (size) {
+ case 1:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ if (insn->rexPrefix &&
+ insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ + (insn->opcodeRegister - MODRM_REG_AL - 4));
+ }
+
+ break;
+ case 2:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 4:
+ insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 8:
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @param size - The width (in bytes) of the operand.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+ uint8_t imm8;
+ uint16_t imm16;
+ uint32_t imm32;
+ uint64_t imm64;
+
+ dbgprintf(insn, "readImmediate()");
+
+ if (insn->numImmediatesConsumed == 2) {
+ debug("Already consumed two immediates");
+ return -1;
+ }
+
+ if (size == 0)
+ size = insn->immediateSize;
+ else
+ insn->immediateSize = size;
+
+ switch (size) {
+ case 1:
+ if (consumeByte(insn, &imm8))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm8;
+ break;
+ case 2:
+ if (consumeUInt16(insn, &imm16))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm16;
+ break;
+ case 4:
+ if (consumeUInt32(insn, &imm32))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm32;
+ break;
+ case 8:
+ if (consumeUInt64(insn, &imm64))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm64;
+ break;
+ }
+
+ insn->numImmediatesConsumed++;
+
+ return 0;
+}
+
+/*
+ * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the vvvv was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ if (insn->vexSize == 3)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+ else if (insn->vexSize == 2)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ else
+ return -1;
+
+ if (insn->mode != MODE_64BIT)
+ insn->vvvv &= 0x7;
+
+ return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ * operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn - The instruction whose operands are to be read and interpreted.
+ * @return - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+ int index;
+ int hasVVVV, needVVVV;
+
+ dbgprintf(insn, "readOperands()");
+
+ /* If non-zero vvvv specified, need to make sure one of the operands
+ uses it. */
+ hasVVVV = !readVVVV(insn);
+ needVVVV = hasVVVV && (insn->vvvv != 0);
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ switch (insn->spec->operands[index].encoding) {
+ case ENCODING_NONE:
+ break;
+ case ENCODING_REG:
+ case ENCODING_RM:
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ dbgprintf(insn, "We currently don't hande code-offset encodings");
+ return -1;
+ case ENCODING_IB:
+ if (readImmediate(insn, 1))
+ return -1;
+ if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+ return -1;
+ break;
+ case ENCODING_IW:
+ if (readImmediate(insn, 2))
+ return -1;
+ break;
+ case ENCODING_ID:
+ if (readImmediate(insn, 4))
+ return -1;
+ break;
+ case ENCODING_IO:
+ if (readImmediate(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Iv:
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
+ break;
+ case ENCODING_Ia:
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
+ break;
+ case ENCODING_RB:
+ if (readOpcodeRegister(insn, 1))
+ return -1;
+ break;
+ case ENCODING_RW:
+ if (readOpcodeRegister(insn, 2))
+ return -1;
+ break;
+ case ENCODING_RD:
+ if (readOpcodeRegister(insn, 4))
+ return -1;
+ break;
+ case ENCODING_RO:
+ if (readOpcodeRegister(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Rv:
+ if (readOpcodeRegister(insn, 0))
+ return -1;
+ break;
+ case ENCODING_I:
+ if (readOpcodeModifier(insn))
+ return -1;
+ break;
+ case ENCODING_VVVV:
+ needVVVV = 0; /* Mark that we have found a VVVV operand. */
+ if (!hasVVVV)
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
+ case ENCODING_DUP:
+ break;
+ default:
+ dbgprintf(insn, "Encountered an operand with an unknown encoding.");
+ return -1;
+ }
+ }
+
+ /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
+ if (needVVVV) return -1;
+
+ return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ * user.
+ *
+ * @param insn - A pointer to the instruction to be populated. Must be
+ * pre-allocated.
+ * @param reader - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ * any internal state.
+ * @param logger - If non-NULL, the function to be used to write log messages
+ * and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ * any internal state.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ * decode the instruction in.
+ * @return - 0 if the instruction's memory could be read; nonzero if
+ * not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ uint64_t startLoc,
+ DisassemblerMode mode) {
+ memset(insn, 0, sizeof(struct InternalInstruction));
+
+ insn->reader = reader;
+ insn->readerArg = readerArg;
+ insn->dlog = logger;
+ insn->dlogArg = loggerArg;
+ insn->startLocation = startLoc;
+ insn->readerCursor = startLoc;
+ insn->mode = mode;
+ insn->numImmediatesConsumed = 0;
+
+ if (readPrefixes(insn) ||
+ readOpcode(insn) ||
+ getID(insn) ||
+ insn->instructionID == 0 ||
+ readOperands(insn))
+ return -1;
+
+ insn->length = insn->readerCursor - insn->startLocation;
+
+ dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+ startLoc, insn->readerCursor, insn->length);
+
+ if (insn->length > 15)
+ dbgprintf(insn, "Instruction exceeds 15-byte limit");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 0000000..a9c90f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,575 @@
+/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ const char* name;
+
+#define INSTRUCTION_IDS \
+ const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM) ((modRM) & 0x7)
+#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib) ((sib) & 0x7)
+#define wFromREX(rex) (((rex) & 0x8) >> 3)
+#define rFromREX(rex) (((rex) & 0x4) >> 2)
+#define xFromREX(rex) (((rex) & 0x2) >> 1)
+#define bFromREX(rex) ((rex) & 0x1)
+
+#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
+#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex) ((vex) & 0x3)
+
+#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex) ((vex) & 0x3)
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT \
+ ENTRY(AL) \
+ ENTRY(CL) \
+ ENTRY(DL) \
+ ENTRY(BL) \
+ ENTRY(AH) \
+ ENTRY(CH) \
+ ENTRY(DH) \
+ ENTRY(BH) \
+ ENTRY(R8B) \
+ ENTRY(R9B) \
+ ENTRY(R10B) \
+ ENTRY(R11B) \
+ ENTRY(R12B) \
+ ENTRY(R13B) \
+ ENTRY(R14B) \
+ ENTRY(R15B) \
+ ENTRY(SPL) \
+ ENTRY(BPL) \
+ ENTRY(SIL) \
+ ENTRY(DIL)
+
+#define EA_BASES_16BIT \
+ ENTRY(BX_SI) \
+ ENTRY(BX_DI) \
+ ENTRY(BP_SI) \
+ ENTRY(BP_DI) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(BP) \
+ ENTRY(BX) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define REGS_16BIT \
+ ENTRY(AX) \
+ ENTRY(CX) \
+ ENTRY(DX) \
+ ENTRY(BX) \
+ ENTRY(SP) \
+ ENTRY(BP) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define EA_BASES_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(sib) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define REGS_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(ESP) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define EA_BASES_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(sib64) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(RSP) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_MMX \
+ ENTRY(MM0) \
+ ENTRY(MM1) \
+ ENTRY(MM2) \
+ ENTRY(MM3) \
+ ENTRY(MM4) \
+ ENTRY(MM5) \
+ ENTRY(MM6) \
+ ENTRY(MM7)
+
+#define REGS_XMM \
+ ENTRY(XMM0) \
+ ENTRY(XMM1) \
+ ENTRY(XMM2) \
+ ENTRY(XMM3) \
+ ENTRY(XMM4) \
+ ENTRY(XMM5) \
+ ENTRY(XMM6) \
+ ENTRY(XMM7) \
+ ENTRY(XMM8) \
+ ENTRY(XMM9) \
+ ENTRY(XMM10) \
+ ENTRY(XMM11) \
+ ENTRY(XMM12) \
+ ENTRY(XMM13) \
+ ENTRY(XMM14) \
+ ENTRY(XMM15)
+
+#define REGS_YMM \
+ ENTRY(YMM0) \
+ ENTRY(YMM1) \
+ ENTRY(YMM2) \
+ ENTRY(YMM3) \
+ ENTRY(YMM4) \
+ ENTRY(YMM5) \
+ ENTRY(YMM6) \
+ ENTRY(YMM7) \
+ ENTRY(YMM8) \
+ ENTRY(YMM9) \
+ ENTRY(YMM10) \
+ ENTRY(YMM11) \
+ ENTRY(YMM12) \
+ ENTRY(YMM13) \
+ ENTRY(YMM14) \
+ ENTRY(YMM15)
+
+#define REGS_SEGMENT \
+ ENTRY(ES) \
+ ENTRY(CS) \
+ ENTRY(SS) \
+ ENTRY(DS) \
+ ENTRY(FS) \
+ ENTRY(GS)
+
+#define REGS_DEBUG \
+ ENTRY(DR0) \
+ ENTRY(DR1) \
+ ENTRY(DR2) \
+ ENTRY(DR3) \
+ ENTRY(DR4) \
+ ENTRY(DR5) \
+ ENTRY(DR6) \
+ ENTRY(DR7)
+
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8)
+
+#define ALL_EA_BASES \
+ EA_BASES_16BIT \
+ EA_BASES_32BIT \
+ EA_BASES_64BIT
+
+#define ALL_SIB_BASES \
+ REGS_32BIT \
+ REGS_64BIT
+
+#define ALL_REGS \
+ REGS_8BIT \
+ REGS_16BIT \
+ REGS_32BIT \
+ REGS_64BIT \
+ REGS_MMX \
+ REGS_XMM \
+ REGS_YMM \
+ REGS_SEGMENT \
+ REGS_DEBUG \
+ REGS_CONTROL \
+ ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address
+ * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
+ * distinguish between bases (EA_BASE_*) and registers that just happen to be
+ * referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+ EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ EA_max
+} EABase;
+
+/*
+ * SIBIndex - All possible values of the SIB index field.
+ * Borrows entries from ALL_EA_BASES with the special case that
+ * sib is synonymous with NONE.
+ */
+typedef enum {
+ SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+ ALL_EA_BASES
+#undef ENTRY
+ SIB_INDEX_max
+} SIBIndex;
+
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+ SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+ ALL_SIB_BASES
+#undef ENTRY
+ SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ * computations.
+ */
+typedef enum {
+ EA_DISP_NONE,
+ EA_DISP_8,
+ EA_DISP_16,
+ EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) MODRM_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ MODRM_REG_max
+} Reg;
+
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+ SEG_OVERRIDE_NONE,
+ SEG_OVERRIDE_CS,
+ SEG_OVERRIDE_SS,
+ SEG_OVERRIDE_DS,
+ SEG_OVERRIDE_ES,
+ SEG_OVERRIDE_FS,
+ SEG_OVERRIDE_GS,
+ SEG_OVERRIDE_max
+} SegmentOverride;
+
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+ VEX_LOB_0F = 0x1,
+ VEX_LOB_0F38 = 0x2,
+ VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+ VEX_PREFIX_NONE = 0x0,
+ VEX_PREFIX_66 = 0x1,
+ VEX_PREFIX_F3 = 0x2,
+ VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ * the decoder. Reads a single byte from the instruction's address space.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param byte - A pointer to a single byte in memory that should be set to
+ * contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ * be read from.
+ * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ * get debugging output from the decoder.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param log - A string that contains the message. Will be reused after
+ * the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+ /* Reader interface (C) */
+ byteReader_t reader;
+ /* Opaque value passed to the reader */
+ void* readerArg;
+ /* The address of the next byte to read via the reader */
+ uint64_t readerCursor;
+
+ /* Logger interface (C) */
+ dlog_t dlog;
+ /* Opaque value passed to the logger */
+ void* dlogArg;
+
+ /* General instruction information */
+
+ /* The mode to disassemble for (64-bit, protected, real) */
+ DisassemblerMode mode;
+ /* The start of the instruction, usable with the reader */
+ uint64_t startLocation;
+ /* The length of the instruction, in bytes */
+ size_t length;
+
+ /* Prefix state */
+
+ /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+ uint8_t prefixPresent[0x100];
+ /* contains the location (for use with the reader) of the prefix byte */
+ uint64_t prefixLocations[0x100];
+ /* The value of the VEX prefix, if present */
+ uint8_t vexPrefix[3];
+ /* The length of the VEX prefix (0 if not present) */
+ uint8_t vexSize;
+ /* The value of the REX prefix, if present */
+ uint8_t rexPrefix;
+ /* The location where a mandatory prefix would have to be (i.e., right before
+ the opcode, or right before the REX prefix if one is present) */
+ uint64_t necessaryPrefixLocation;
+ /* The segment override type */
+ SegmentOverride segmentOverride;
+
+ /* Sizes of various critical pieces of data, in bytes */
+ uint8_t registerSize;
+ uint8_t addressSize;
+ uint8_t displacementSize;
+ uint8_t immediateSize;
+
+ /* opcode state */
+
+ /* The value of the two-byte escape prefix (usually 0x0f) */
+ uint8_t twoByteEscape;
+ /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+ uint8_t threeByteEscape;
+ /* The last byte of the opcode, not counting any ModR/M extension */
+ uint8_t opcode;
+ /* The ModR/M byte of the instruction, if it is an opcode extension */
+ uint8_t modRMExtension;
+
+ /* decode state */
+
+ /* The type of opcode, used for indexing into the array of decode tables */
+ OpcodeType opcodeType;
+ /* The instruction ID, extracted from the decode table */
+ uint16_t instructionID;
+ /* The specifier for the instruction, from the instruction info table */
+ const struct InstructionSpecifier *spec;
+
+ /* state for additional bytes, consumed during operand decode. Pattern:
+ consumed___ indicates that the byte was already consumed and does not
+ need to be consumed again */
+
+ /* The VEX.vvvv field, which contains a third register operand for some AVX
+ instructions */
+ Reg vvvv;
+
+ /* The ModR/M byte, which contains most register operands and some portion of
+ all memory operands */
+ BOOL consumedModRM;
+ uint8_t modRM;
+
+ /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+ BOOL consumedSIB;
+ uint8_t sib;
+
+ /* The displacement, used for memory operands */
+ BOOL consumedDisplacement;
+ int32_t displacement;
+
+ /* Immediates. There can be two in some cases */
+ uint8_t numImmediatesConsumed;
+ uint8_t numImmediatesTranslated;
+ uint64_t immediates[2];
+
+ /* A register or immediate operand encoded into the opcode */
+ BOOL consumedOpcodeModifier;
+ uint8_t opcodeModifier;
+ Reg opcodeRegister;
+
+ /* Portions of the ModR/M byte */
+
+ /* These fields determine the allowable values for the ModR/M fields, which
+ depend on operand and address widths */
+ EABase eaBaseBase;
+ EABase eaRegBase;
+ Reg regBase;
+
+ /* The Mod and R/M fields can encode a base for an effective address, or a
+ register. These are separated into two fields here */
+ EABase eaBase;
+ EADisplacement eaDisplacement;
+ /* The reg field always encodes a register */
+ Reg reg;
+
+ /* SIB state */
+ SIBIndex sibIndex;
+ uint8_t sibScale;
+ SIBBase sibBase;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ * a buffer provided by the consumer.
+ * @param insn - The buffer to store the instruction in. Allocated by the
+ * consumer.
+ * @param reader - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ * specific to the consumer. May be NULL.
+ * @param logger - The dlog_t to be used in printing status messages from the
+ * disassembler. May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ * specific to the logger. May be NULL.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ uint64_t startLoc,
+ DisassemblerMode mode);
+
+/* x86DisassemblerDebug - C-accessible function for printing a message to
+ * debugs()
+ * @param file - The name of the file printing the debug message.
+ * @param line - The line number that printed the debug message.
+ * @param s - The message to print.
+ */
+
+void x86DisassemblerDebug(const char *file,
+ unsigned line,
+ const char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 0000000..8b79335
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,385 @@
+/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ * generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/Support/DataTypes.h"
+
+#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM x86DisassemblerContexts
+#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
+
+#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR "x86DisassemblerContexts"
+#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly. Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS \
+ ENUM_ENTRY(ATTR_NONE, 0x00) \
+ ENUM_ENTRY(ATTR_64BIT, 0x01) \
+ ENUM_ENTRY(ATTR_XS, 0x02) \
+ ENUM_ENTRY(ATTR_XD, 0x04) \
+ ENUM_ENTRY(ATTR_REXW, 0x08) \
+ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+ ENUM_ENTRY(ATTR_VEX, 0x20) \
+ ENUM_ENTRY(ATTR_VEXL, 0x40)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+ ATTRIBUTE_BITS
+ ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode. Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/* Class name Rank Rationale for rank assignment */
+#define INSTRUCTION_CONTEXTS \
+ ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
+ ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
+ "64-bit mode but no more") \
+ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
+ "change width; overrides IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
+ "secondary") \
+ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
+ "opcode") \
+ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
+ "IC_64BIT_REXW_XS") \
+ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
+ "else because this changes most " \
+ "operands' meaning") \
+ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
+ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
+ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
+ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
+ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
+ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
+
+
+#define ENUM_ENTRY(n, r, d) n,
+typedef enum {
+ INSTRUCTION_CONTEXTS
+ IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+ ONEBYTE = 0,
+ TWOBYTE = 1,
+ THREEBYTE_38 = 2,
+ THREEBYTE_3A = 3,
+ THREEBYTE_A6 = 4,
+ THREEBYTE_A7 = 5
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table. After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode. Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ * instruction is the same.
+ * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ * corresponds to one instruction; otherwise, it corresponds to
+ * a different instruction.
+ * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
+ * to a different instruction.
+ */
+
+#define MODRMTYPES \
+ ENUM_ENTRY(MODRM_ONEENTRY) \
+ ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODRMTYPES
+ MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * instruction each possible value of the ModR/M byte corresponds to. Once
+ * this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+ uint8_t modrm_type;
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ * given a particular opcode.
+ */
+struct OpcodeDecision {
+ struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ * a particular context (set of attributes). Since there are many possible
+ * contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ * applies given a specific set of attributes. Hence there are only IC_max
+ * entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+ struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/*
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS \
+ ENUM_ENTRY(ENCODING_NONE, "") \
+ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
+ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
+ ENUM_ENTRY(ENCODING_CW, "2-byte") \
+ ENUM_ENTRY(ENCODING_CD, "4-byte") \
+ ENUM_ENTRY(ENCODING_CP, "6-byte") \
+ ENUM_ENTRY(ENCODING_CO, "8-byte") \
+ ENUM_ENTRY(ENCODING_CT, "10-byte") \
+ ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
+ ENUM_ENTRY(ENCODING_IW, "2-byte") \
+ ENUM_ENTRY(ENCODING_ID, "4-byte") \
+ ENUM_ENTRY(ENCODING_IO, "8-byte") \
+ ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
+ "the opcode byte") \
+ ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
+ ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
+ ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
+ ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
+ "opcode byte") \
+ \
+ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
+ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
+ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
+ "opcode byte") \
+ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
+ "in type")
+
+#define ENUM_ENTRY(n, d) n,
+ typedef enum {
+ ENCODINGS
+ ENCODING_max
+ } OperandEncoding;
+#undef ENUM_ENTRY
+
+/*
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES \
+ ENUM_ENTRY(TYPE_NONE, "") \
+ ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
+ ENUM_ENTRY(TYPE_REL16, "2-byte") \
+ ENUM_ENTRY(TYPE_REL32, "4-byte") \
+ ENUM_ENTRY(TYPE_REL64, "8-byte") \
+ ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
+ ENUM_ENTRY(TYPE_R16, "2-byte") \
+ ENUM_ENTRY(TYPE_R32, "4-byte") \
+ ENUM_ENTRY(TYPE_R64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
+ ENUM_ENTRY(TYPE_IMM16, "2-byte") \
+ ENUM_ENTRY(TYPE_IMM32, "4-byte") \
+ ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
+ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
+ ENUM_ENTRY(TYPE_RM16, "2-byte") \
+ ENUM_ENTRY(TYPE_RM32, "4-byte") \
+ ENUM_ENTRY(TYPE_RM64, "8-byte") \
+ ENUM_ENTRY(TYPE_M, "Memory operand") \
+ ENUM_ENTRY(TYPE_M8, "1-byte") \
+ ENUM_ENTRY(TYPE_M16, "2-byte") \
+ ENUM_ENTRY(TYPE_M32, "4-byte") \
+ ENUM_ENTRY(TYPE_M64, "8-byte") \
+ ENUM_ENTRY(TYPE_LEA, "Effective address") \
+ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
+ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
+ "base)") \
+ ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
+ ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
+ ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
+ ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
+ "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
+ ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
+ ENUM_ENTRY(TYPE_M64FP, "64-bit") \
+ ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
+ "floating-point instructions") \
+ ENUM_ENTRY(TYPE_M32INT, "4-byte") \
+ ENUM_ENTRY(TYPE_M64INT, "8-byte") \
+ ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
+ ENUM_ENTRY(TYPE_MM, "MMX register operand") \
+ ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
+ ENUM_ENTRY(TYPE_MM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
+ ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
+ ENUM_ENTRY(TYPE_XMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM256, "32-byte") \
+ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
+ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
+ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
+ \
+ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
+ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
+ ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
+ ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
+ ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
+ ENUM_ENTRY(TYPE_DUP1, "operand 1") \
+ ENUM_ENTRY(TYPE_DUP2, "operand 2") \
+ ENUM_ENTRY(TYPE_DUP3, "operand 3") \
+ ENUM_ENTRY(TYPE_DUP4, "operand 4") \
+ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,
+typedef enum {
+ TYPES
+ TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/*
+ * OperandSpecifier - The specification for how to extract and interpret one
+ * operand.
+ */
+struct OperandSpecifier {
+ OperandEncoding encoding;
+ OperandType type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found. Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES \
+ ENUM_ENTRY(MODIFIER_NONE) \
+ ENUM_ENTRY(MODIFIER_OPCODE) \
+ ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODIFIER_TYPES
+ MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+ ModifierType modifierType;
+ uint8_t modifierBase;
+ struct OperandSpecifier operands[X86_MAX_OPERANDS];
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+ MODE_16BIT,
+ MODE_32BIT,
+ MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 0000000..029d491
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,144 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <map>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter.inc"
+
+X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {
+}
+
+void X86ATTInstPrinter::printRegName(raw_ostream &OS,
+ unsigned RegNo) const {
+ OS << '%' << getRegisterName(RegNo);
+}
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, OS))
+ printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream) {
+ printAnnotation(OS, Annot);
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+ }
+}
+
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ switch (MI->getOperand(Op).getImm()) {
+ default: assert(0 && "Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls). These
+/// print slightly differently than normal immediates. For example, a $ is not
+/// emitted.
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ // Print this as a signed 32-bit value.
+ O << (int)Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << '%' << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ // Print X86 immediates as signed values.
+ O << '$' << (int64_t)Op.getImm();
+
+ if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+ *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << '$' << *Op.getExpr();
+ }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
+
+ if (DispSpec.isImm()) {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << DispVal;
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ O << *DispSpec.getExpr();
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op, O);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, O);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
new file mode 100644
index 0000000..0293869
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,86 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+ X86ATTInstPrinter(const MCAsmInfo &MAI);
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+ void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
new file mode 100644
index 0000000..8d85b95
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -0,0 +1,287 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<unsigned, 8> ShuffleMask;
+ const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(2, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLBWMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLWDMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLDQMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLQDQMask(2, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPDrmi:
+ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLPDMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLPSMask(8, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ DecodeUNPCKHPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ DecodeUNPCKHPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSri:
+ DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSYri:
+ DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDri:
+ DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDYri:
+ DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERM2F128rr:
+ DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+ }
+
+
+ // If this was a shuffle operation, print the shuffle mask.
+ if (!ShuffleMask.empty()) {
+ if (DestName == 0) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e &&
+ (int)ShuffleMask[i] >= 0 &&
+ (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+ }
+
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
new file mode 100644
index 0000000..6b86db4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+ class MCInst;
+ class raw_ostream;
+ void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 0000000..f9ab5ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,146 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream) {
+ printAnnotation(OS, Annot);
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+ }
+}
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ switch (MI->getOperand(Op).getImm()) {
+ default: assert(0 && "Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+ for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+ O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ PrintRegName(O, getRegisterName(Op.getReg()));
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op, O);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+2, O);
+ NeedPlus = true;
+ }
+
+
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ O << *DispSpec.getExpr();
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+
+ O << ']';
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
new file mode 100644
index 0000000..6d5ec62
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,96 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+ X86IntelInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "OPAQUE PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
new file mode 100644
index 0000000..69ad7d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -0,0 +1,458 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+ cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default: assert(0 && "invalid fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1: return 0;
+ case FK_PCRel_2:
+ case FK_Data_2: return 1;
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case X86::reloc_global_offset_table:
+ case FK_Data_4: return 2;
+ case FK_PCRel_8:
+ case FK_Data_8: return 3;
+ }
+}
+
+namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class X86AsmBackend : public MCAsmBackend {
+public:
+ X86AsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return X86::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+
+ // Check that uppper bits are either all zeros or all ones.
+ // Specifically ignore overflow/underflow as long as the leakage is
+ // limited to the lower bits. This is to remain compatible with
+ // other assemblers.
+ assert(isIntN(Size * 8 + 1, Value) &&
+ "Value does not fit in the Fixup field");
+
+ for (unsigned i = 0; i != Size; ++i)
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+};
+} // end anonymous namespace
+
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ case X86::JAE_1: return X86::JAE_4;
+ case X86::JA_1: return X86::JA_4;
+ case X86::JBE_1: return X86::JBE_4;
+ case X86::JB_1: return X86::JB_4;
+ case X86::JE_1: return X86::JE_4;
+ case X86::JGE_1: return X86::JGE_4;
+ case X86::JG_1: return X86::JG_4;
+ case X86::JLE_1: return X86::JLE_4;
+ case X86::JL_1: return X86::JL_4;
+ case X86::JMP_1: return X86::JMP_4;
+ case X86::JNE_1: return X86::JNE_4;
+ case X86::JNO_1: return X86::JNO_4;
+ case X86::JNP_1: return X86::JNP_4;
+ case X86::JNS_1: return X86::JNS_4;
+ case X86::JO_1: return X86::JO_4;
+ case X86::JP_1: return X86::JP_4;
+ case X86::JS_1: return X86::JS_4;
+ }
+}
+
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ // IMUL
+ case X86::IMUL16rri8: return X86::IMUL16rri;
+ case X86::IMUL16rmi8: return X86::IMUL16rmi;
+ case X86::IMUL32rri8: return X86::IMUL32rri;
+ case X86::IMUL32rmi8: return X86::IMUL32rmi;
+ case X86::IMUL64rri8: return X86::IMUL64rri32;
+ case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+ // AND
+ case X86::AND16ri8: return X86::AND16ri;
+ case X86::AND16mi8: return X86::AND16mi;
+ case X86::AND32ri8: return X86::AND32ri;
+ case X86::AND32mi8: return X86::AND32mi;
+ case X86::AND64ri8: return X86::AND64ri32;
+ case X86::AND64mi8: return X86::AND64mi32;
+
+ // OR
+ case X86::OR16ri8: return X86::OR16ri;
+ case X86::OR16mi8: return X86::OR16mi;
+ case X86::OR32ri8: return X86::OR32ri;
+ case X86::OR32mi8: return X86::OR32mi;
+ case X86::OR64ri8: return X86::OR64ri32;
+ case X86::OR64mi8: return X86::OR64mi32;
+
+ // XOR
+ case X86::XOR16ri8: return X86::XOR16ri;
+ case X86::XOR16mi8: return X86::XOR16mi;
+ case X86::XOR32ri8: return X86::XOR32ri;
+ case X86::XOR32mi8: return X86::XOR32mi;
+ case X86::XOR64ri8: return X86::XOR64ri32;
+ case X86::XOR64mi8: return X86::XOR64mi32;
+
+ // ADD
+ case X86::ADD16ri8: return X86::ADD16ri;
+ case X86::ADD16mi8: return X86::ADD16mi;
+ case X86::ADD32ri8: return X86::ADD32ri;
+ case X86::ADD32mi8: return X86::ADD32mi;
+ case X86::ADD64ri8: return X86::ADD64ri32;
+ case X86::ADD64mi8: return X86::ADD64mi32;
+
+ // SUB
+ case X86::SUB16ri8: return X86::SUB16ri;
+ case X86::SUB16mi8: return X86::SUB16mi;
+ case X86::SUB32ri8: return X86::SUB32ri;
+ case X86::SUB32mi8: return X86::SUB32mi;
+ case X86::SUB64ri8: return X86::SUB64ri32;
+ case X86::SUB64mi8: return X86::SUB64mi32;
+
+ // CMP
+ case X86::CMP16ri8: return X86::CMP16ri;
+ case X86::CMP16mi8: return X86::CMP16mi;
+ case X86::CMP32ri8: return X86::CMP32ri;
+ case X86::CMP32mi8: return X86::CMP32mi;
+ case X86::CMP64ri8: return X86::CMP64ri32;
+ case X86::CMP64mi8: return X86::CMP64mi32;
+
+ // PUSH
+ case X86::PUSHi8: return X86::PUSHi32;
+ case X86::PUSHi16: return X86::PUSHi32;
+ case X86::PUSH64i8: return X86::PUSH64i32;
+ case X86::PUSH64i16: return X86::PUSH64i32;
+ }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ unsigned R = getRelaxedOpcodeArith(Op);
+ if (R != Op)
+ return R;
+ return getRelaxedOpcodeBranch(Op);
+}
+
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Branches can always be relaxed.
+ if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+
+ if (MCDisableArithRelaxation)
+ return false;
+
+ // Check if this instruction is ever relaxable.
+ if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+
+ // Check if it has an expression and is not RIP relative.
+ bool hasExp = false;
+ bool hasRIP = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const MCOperand &Op = Inst.getOperand(i);
+ if (Op.isExpr())
+ hasExp = true;
+
+ if (Op.isReg() && Op.getReg() == X86::RIP)
+ hasRIP = true;
+ }
+
+ // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+ // how we do relaxations?
+ return hasExp && !hasRIP;
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
+}
+
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes. This returns the number of bytes written. It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ static const uint8_t Nops[10][10] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ };
+
+ // Write an optimal sequence for the first 15 bytes.
+ const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+ for (uint64_t i = 0, e = Prefixes; i != e; i++)
+ OW->Write8(0x66);
+ const uint64_t Rest = OptimalCount - Prefixes;
+ for (uint64_t i = 0, e = Rest; i != e; i++)
+ OW->Write8(Nops[Rest - 1][i]);
+
+ // Finish with single byte nops.
+ for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
+ OW->Write8(0x90);
+
+ return true;
+}
+
+/* *** */
+
+namespace {
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+ : X86AsmBackend(T), OSType(_OSType) {
+ HasReliableSymbolDifference = true;
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+ return ES.getFlags() & ELF::SHF_MERGE;
+ }
+};
+
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+ }
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+ }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+ bool Is64Bit;
+
+public:
+ WindowsX86AsmBackend(const Target &T, bool is64Bit)
+ : X86AsmBackend(T)
+ , Is64Bit(is64Bit) {
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createWinCOFFObjectWriter(OS, Is64Bit);
+ }
+};
+
+class DarwinX86AsmBackend : public X86AsmBackend {
+public:
+ DarwinX86AsmBackend(const Target &T)
+ : X86AsmBackend(T) { }
+};
+
+class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+public:
+ DarwinX86_32AsmBackend(const Target &T)
+ : DarwinX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_i386,
+ object::mach::CSX86_ALL);
+ }
+};
+
+class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+public:
+ DarwinX86_64AsmBackend(const Target &T)
+ : DarwinX86AsmBackend(T) {
+ HasReliableSymbolDifference = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+ object::mach::CTM_x86_64,
+ object::mach::CSX86_ALL);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ // Temporary labels in the string literals sections require symbols. The
+ // issue is that the x86_64 relocation format does not allow symbol +
+ // offset, and so the linker does not have enough information to resolve the
+ // access to the appropriate atom unless an external relocation is used. For
+ // non-cstring sections, we expect the compiler to use a non-temporary label
+ // for anything that could have an addend pointing outside the symbol.
+ //
+ // See <rdar://problem/4765733>.
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
+ }
+
+ virtual bool isSectionAtomizable(const MCSection &Section) const {
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+ switch (SMO.getType()) {
+ default:
+ return true;
+
+ case MCSectionMachO::S_4BYTE_LITERALS:
+ case MCSectionMachO::S_8BYTE_LITERALS:
+ case MCSectionMachO::S_16BYTE_LITERALS:
+ case MCSectionMachO::S_LITERAL_POINTERS:
+ case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+ case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+ case MCSectionMachO::S_INTERPOSING:
+ return false;
+ }
+ }
+};
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return new DarwinX86_32AsmBackend(T);
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, false);
+
+ return new ELFX86_32AsmBackend(T, TheTriple.getOS());
+}
+
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return new DarwinX86_64AsmBackend(T);
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, true);
+
+ return new ELFX86_64AsmBackend(T, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
new file mode 100644
index 0000000..e6ba705
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -0,0 +1,548 @@
+//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the X86 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86BASEINFO_H
+#define X86BASEINFO_H
+
+#include "X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+
+namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+} // end namespace X86;
+
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // X86 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+ /// relocation of:
+ /// SYMBOL_LABEL + [. - PICBASELABEL]
+ MO_GOT_ABSOLUTE_ADDRESS,
+
+ /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+ /// immediate should get the value of the symbol minus the PIC base label:
+ /// SYMBOL_LABEL - PICBASELABEL
+ MO_PIC_BASE_OFFSET,
+
+ /// MO_GOT - On a symbol operand this indicates that the immediate is the
+ /// offset to the GOT entry for the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOT
+ MO_GOT,
+
+ /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTOFF
+ MO_GOTOFF,
+
+ /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+ /// offset to the GOT entry for the symbol name from the current code
+ /// location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTPCREL
+ MO_GOTPCREL,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @PLT
+ MO_PLT,
+
+ /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSGD
+ MO_TLSGD,
+
+ /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTTPOFF
+ MO_GOTTPOFF,
+
+ /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @INDNTPOFF
+ MO_INDNTPOFF,
+
+ /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TPOFF
+ MO_TPOFF,
+
+ /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @NTPOFF
+ MO_NTPOFF,
+
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB,
+
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+
+ /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+ /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+ /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+ /// stub.
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+ MRM_C1 = 33,
+ MRM_C2 = 34,
+ MRM_C3 = 35,
+ MRM_C4 = 36,
+ MRM_C8 = 37,
+ MRM_C9 = 38,
+ MRM_E8 = 39,
+ MRM_F0 = 40,
+ MRM_F8 = 41,
+ MRM_F9 = 42,
+ MRM_D0 = 45,
+ MRM_D1 = 46,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 44,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0x1F << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
+ // TF - Prefix before and after 0x0F
+ TF = 17 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = Op0Shift + 5,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = REXShift + 1,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm16 = 3 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = ImmShift + 3,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = FPTypeShift + 3,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = LOCKShift + 1,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
+
+ OpcodeShift = SSEDomainShift + 2,
+
+ //===------------------------------------------------------------------===//
+ /// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
+ VEX = 1U << 0,
+
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
+ VEX_W = 1U << 1,
+
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1U << 2,
+
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
+ VEX_I8IMM = 1U << 3,
+
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 4,
+
+ // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
+ // prefix. Usually used for scalar instructions. Needed by disassembler.
+ VEX_LIG = 1U << 5,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 6
+ };
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ return TSFlags >> X86II::OpcodeShift;
+ }
+
+ static inline bool hasImm(uint64_t TSFlags) {
+ return (TSFlags & X86II::ImmMask) != 0;
+ }
+
+ /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+ /// of the specified instruction.
+ static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8:
+ case X86II::Imm8PCRel: return 1;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
+ case X86II::Imm32:
+ case X86II::Imm32PCRel: return 4;
+ case X86II::Imm64: return 8;
+ }
+ }
+
+ /// isImmPCRel - Return true if the immediate of the specified instruction's
+ /// TSFlags indicates that it is pc relative.
+ static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
+ default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
+ case X86II::RawFrmImm16:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
+ }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ return 0;
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
+ return -1;
+ }
+ }
+
+ /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+ /// higher) register? e.g. r8, xmm8, xmm13, etc.
+ static inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ switch (RegNo) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
+ return true;
+ }
+ return false;
+ }
+
+ static inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
new file mode 100644
index 0000000..17d242a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -0,0 +1,33 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+ reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+ reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq
+ reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
+ // this will be sign extended at
+ // runtime.
+ reloc_global_offset_table, // 32-bit, relative to the start
+ // of the instruction. Used only
+ // for _GLOBAL_OFFSET_TABLE_.
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
new file mode 100644
index 0000000..2703100
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -0,0 +1,138 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
+ clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+ clEnumValEnd));
+
+
+static const char *const x86_asm_table[] = {
+ "{si}", "S",
+ "{di}", "D",
+ "{ax}", "a",
+ "{cx}", "c",
+ "{memory}", "memory",
+ "{flags}", "",
+ "{dirflag}", "",
+ "{fpsr}", "",
+ "{fpcr}", "",
+ "{cc}", "cc",
+ 0,0};
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ if (is64Bit)
+ PointerSize = 8;
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+
+ // Use ## as a comment string so that .s files generated by llvm can go
+ // through the GCC preprocessor without causing an error. This is needed
+ // because "clang foo.s" runs the C preprocessor, which is usually reserved
+ // for .S files on other systems. Perhaps this is because the file system
+ // wasn't always case preserving or something.
+ CommentString = "##";
+ PCSymbol = ".";
+
+ SupportsDebugInformation = true;
+ DwarfUsesInlineInfoSection = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+ : X86MCAsmInfoDarwin(Triple) {
+}
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+ if (T.getArch() == Triple::x86_64)
+ PointerSize = 8;
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
+ // .words.
+ if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+ Data64bitsDirective = 0;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
+
+X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
new file mode 100644
index 0000000..2cd4c8e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -0,0 +1,46 @@
+//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+ class Triple;
+
+ struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit X86MCAsmInfoDarwin(const Triple &Triple);
+ };
+
+ struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ virtual const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const;
+ };
+
+ struct X86ELFMCAsmInfo : public MCAsmInfo {
+ explicit X86ELFMCAsmInfo(const Triple &Triple);
+ virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+ };
+
+ struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
+ explicit X86MCAsmInfoCOFF(const Triple &Triple);
+ };
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
new file mode 100644
index 0000000..2eee112
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -0,0 +1,1074 @@
+//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class X86MCCodeEmitter : public MCCodeEmitter {
+ X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+public:
+ X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti), Ctx(ctx) {
+ }
+
+ ~X86MCCodeEmitter() {}
+
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+
+ static unsigned GetX86RegNum(const MCOperand &MO) {
+ return X86_MC::getX86RegNum(MO.getReg());
+ }
+
+ // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+ // 0-7 and the difference between the 2 groups is given by the REX prefix.
+ // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+ // in 1's complement form, example:
+ //
+ // ModRM field => XMM9 => 1
+ // VEX.VVVV => XMM9 => ~9
+ //
+ // See table 4-35 of Intel AVX Programming Reference for details.
+ static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
+ (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
+ SrcRegNum += 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitImmediate(const MCOperand &Disp,
+ unsigned ImmSize, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int ImmOffset = 0) const;
+
+ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+ }
+
+ void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+ unsigned &CurByte, raw_ostream &OS) const {
+ EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+ }
+
+ void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+ unsigned &CurByte, raw_ostream &OS) const {
+ // SIB byte is in the same format as the ModRMByte.
+ EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+ }
+
+
+ void EmitMemModRMByte(const MCInst &MI, unsigned Op,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ raw_ostream &OS) const;
+
+ void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const;
+
+ void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ raw_ostream &OS) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
+ unsigned Size = X86II::getSizeOfImm(TSFlags);
+ bool isPCRel = X86II::isImmPCRel(TSFlags);
+
+ return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+ Expr = BE->getLHS();
+ }
+
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return false;
+
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbol &S = Ref->getSymbol();
+ return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
+
+void X86MCCodeEmitter::
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+ const MCExpr *Expr = NULL;
+ if (DispOp.isImm()) {
+ // If this is a simple integer displacement that doesn't require a
+ // relocation, emit it now.
+ if (FixupKind != FK_PCRel_1 &&
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ return;
+ }
+ Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ } else {
+ Expr = DispOp.getExpr();
+ }
+
+ // If we have an immoffset, add it to the expression.
+ if ((FixupKind == FK_Data_4 ||
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
+ StartsWithGlobalOffsetTable(Expr)) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ ImmOffset = CurByte;
+ }
+
+ // If the fixup is pc-relative, we need to bias the value to be relative to
+ // the start of the field, not the end of the field.
+ if (FixupKind == FK_PCRel_4 ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
+ ImmOffset -= 4;
+ if (FixupKind == FK_PCRel_2)
+ ImmOffset -= 2;
+ if (FixupKind == FK_PCRel_1)
+ ImmOffset -= 1;
+
+ if (ImmOffset)
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+ Ctx);
+
+ // Emit a symbolic constant as a fixup and 4 zeros.
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+ EmitConstant(0, Size, CurByte, OS);
+}
+
+void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte,
+ raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const{
+ const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+ unsigned BaseReg = Base.getReg();
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
+ assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+
+ unsigned FixupKind = X86::reloc_riprel_4byte;
+
+ // movq loads are handled with a special relocation form which allows the
+ // linker to eliminate some loads for GOT references which end up in the
+ // same linkage unit.
+ if (MI.getOpcode() == X86::MOV64rm)
+ FixupKind = X86::reloc_riprel_4byte_movq_load;
+
+ // rip-relative addressing is actually relative to the *next* instruction.
+ // Since an immediate can follow the mod/rm byte for an instruction, this
+ // means that we need to bias the immediate field of the instruction with
+ // the size of the immediate field. If we have this case, add it into the
+ // expression to emit.
+ int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+
+ EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
+ CurByte, OS, Fixups, -ImmSize);
+ return;
+ }
+
+ unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+
+ // Determine whether a SIB byte is needed.
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
+
+ if (// The SIB byte must be used if there is an index register.
+ IndexReg.getReg() == 0 &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ (!is64BitMode() || BaseReg != 0)) {
+
+ if (BaseReg == 0) { // [disp32] in X86-32 mode
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ return;
+ }
+
+ // If the base is not EBP/ESP and there is no displacement, use simple
+ // indirect register encoding, this handles addresses like [EAX]. The
+ // encoding for [EBP] with no displacement means [disp32] so we handle it
+ // by emitting a displacement of 0 below.
+ if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
+ EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ return;
+ }
+
+ // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+ if (Disp.isImm() && isDisp8(Disp.getImm())) {
+ EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+ EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+ return;
+ }
+
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
+ return;
+ }
+
+ // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp32 = true;
+ } else if (!Disp.isImm()) {
+ // Emit the normal disp32 encoding.
+ EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp32 = true;
+ } else if (Disp.getImm() == 0 &&
+ // Base reg can't be anything that ends up with '5' as the base
+ // reg, it is the magic [*] nomenclature that indicates no base.
+ BaseRegNo != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ } else if (isDisp8(Disp.getImm())) {
+ // Emit the disp8 encoding.
+ EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding.
+ EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = GetX86RegNum(IndexReg);
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
+ EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+ } else {
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = GetX86RegNum(IndexReg);
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8)
+ EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+ else if (ForceDisp32 || Disp.getImm() != 0)
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
+}
+
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const MCInstrDesc &Desc,
+ raw_ostream &OS) const {
+ bool HasVEX_4V = false;
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ //
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ VEX_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ VEX_L = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: assert(0 && "Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::TF: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+ // Set the vector length to 256-bit if YMM0-YMM15 is used
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+ VEX_L = 1;
+ }
+
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
+ unsigned CurOp = 0;
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_R = 0x0;
+
+ unsigned MemAddrOffset = 1;
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, 1);
+ MemAddrOffset++;
+ }
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ case X86II::MRMSrcReg:
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
+ break;
+ }
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ EmitByte(0xC5, CurByte, OS);
+ EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ return;
+ }
+
+ // 3 byte VEX prefix
+ EmitByte(0xC4, CurByte, OS);
+ EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+ EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
+/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
+ const MCInstrDesc &Desc) {
+ unsigned REX = 0;
+ if (TSFlags & X86II::REX_W)
+ REX |= 1 << 3; // set REX.W
+
+ if (MI.getNumOperands() == 0) return REX;
+
+ unsigned NumOps = MI.getNumOperands();
+ // FIXME: MCInst should explicitize the two-addrness.
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
+ // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+ // that returns non-zero.
+ REX |= 0x40; // REX fixed encoding prefix
+ break;
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMSrcReg:
+ if (MI.getOperand(0).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 2; // set REX.R
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << 0; // set REX.B
+ }
+ break;
+ case X86II::MRMSrcMem: {
+ if (MI.getOperand(0).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 2; // set REX.R
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && MI.getOperand(e).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+ REX |= 1 << 2; // set REX.R
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
+ Bit++;
+ }
+ }
+ break;
+ }
+ default:
+ if (MI.getOperand(0).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 0; // set REX.B
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << 2; // set REX.R
+ }
+ break;
+ }
+ return REX;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+ unsigned &CurByte, int MemOperand,
+ const MCInst &MI,
+ raw_ostream &OS) const {
+ switch (TSFlags & X86II::SegOvrMask) {
+ default: assert(0 && "Invalid segment!");
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: assert(0 && "Unknown segment register!");
+ case 0: break;
+ case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+ case X86::SS: EmitByte(0x36, CurByte, OS); break;
+ case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+ case X86::ES: EmitByte(0x26, CurByte, OS); break;
+ case X86::FS: EmitByte(0x64, CurByte, OS); break;
+ case X86::GS: EmitByte(0x65, CurByte, OS); break;
+ }
+ }
+ break;
+ case X86II::FS:
+ EmitByte(0x64, CurByte, OS);
+ break;
+ case X86II::GS:
+ EmitByte(0x65, CurByte, OS);
+ break;
+ }
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present. If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const MCInstrDesc &Desc,
+ raw_ostream &OS) const {
+
+ // Emit the lock opcode prefix as needed.
+ if (TSFlags & X86II::LOCK)
+ EmitByte(0xF0, CurByte, OS);
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // Emit the repeat opcode prefix as needed.
+ if ((TSFlags & X86II::Op0Mask) == X86II::REP)
+ EmitByte(0xF3, CurByte, OS);
+
+ // Emit the address size opcode prefix as needed.
+ if ((TSFlags & X86II::AdSize) ||
+ (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+ EmitByte(0x67, CurByte, OS);
+
+ // Emit the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ EmitByte(0x66, CurByte, OS);
+
+ bool Need0FPrefix = false;
+ switch (TSFlags & X86II::Op0Mask) {
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ case X86II::REP: break; // already handled.
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::TF: // F2 0F 38
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::XS: // F3 0F
+ EmitByte(0xF3, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: EmitByte(0xD8, CurByte, OS); break;
+ case X86II::D9: EmitByte(0xD9, CurByte, OS); break;
+ case X86II::DA: EmitByte(0xDA, CurByte, OS); break;
+ case X86II::DB: EmitByte(0xDB, CurByte, OS); break;
+ case X86II::DC: EmitByte(0xDC, CurByte, OS); break;
+ case X86II::DD: EmitByte(0xDD, CurByte, OS); break;
+ case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
+ case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
+ }
+
+ // Handle REX prefix.
+ // FIXME: Can this come before F2 etc to simplify emission?
+ if (is64BitMode()) {
+ if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
+ EmitByte(0x40 | REX, CurByte, OS);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ EmitByte(0x0F, CurByte, OS);
+
+ // FIXME: Pull this up into previous switch if REX can be moved earlier.
+ switch (TSFlags & X86II::Op0Mask) {
+ case X86II::TF: // F2 0F 38
+ case X86II::T8: // 0F 38
+ EmitByte(0x38, CurByte, OS);
+ break;
+ case X86II::TA: // 0F 3A
+ EmitByte(0x3A, CurByte, OS);
+ break;
+ case X86II::A6: // 0F A6
+ EmitByte(0xA6, CurByte, OS);
+ break;
+ case X86II::A7: // 0F A7
+ EmitByte(0xA7, CurByte, OS);
+ break;
+ }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded.
+ if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return;
+
+ // If this is a two-address instruction, skip one of the register operands.
+ // FIXME: This should be handled during MCInst lowering.
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
+ ++CurOp;
+ else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = false;
+
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = false;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
+ HasVEXPrefix = true;
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ else
+ EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
+ unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+
+ unsigned SrcRegNum = 0;
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+ default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
+ assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+ case X86II::Pseudo:
+ assert(0 && "Pseudo instruction shouldn't be emitted");
+ case X86II::RawFrm:
+ EmitByte(BaseOpcode, CurByte, OS);
+ break;
+ case X86II::RawFrmImm8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+ break;
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ break;
+
+ case X86II::AddRegFrm:
+ EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+ break;
+
+ case X86II::MRMDestReg:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitRegModRMByte(MI.getOperand(CurOp),
+ GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
+ CurOp += 2;
+ break;
+
+ case X86II::MRMDestMem:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + X86::AddrNumOperands;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ EmitMemModRMByte(MI, CurOp,
+ GetX86RegNum(MI.getOperand(SrcRegNum)),
+ TSFlags, CurByte, OS, Fixups);
+ CurOp = SrcRegNum + 1;
+ break;
+
+ case X86II::MRMSrcReg:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ EmitRegModRMByte(MI.getOperand(SrcRegNum),
+ GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+
+ EmitByte(BaseOpcode, CurByte, OS);
+
+ EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+ TSFlags, CurByte, OS, Fixups);
+ CurOp += AddrOperands + 1;
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ CurOp++;
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitRegModRMByte(MI.getOperand(CurOp++),
+ (TSFlags & X86II::FormMask)-X86II::MRM0r,
+ CurByte, OS);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
+ TSFlags, CurByte, OS, Fixups);
+ CurOp += X86::AddrNumOperands;
+ break;
+ case X86II::MRM_C1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC1, CurByte, OS);
+ break;
+ case X86II::MRM_C2:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC2, CurByte, OS);
+ break;
+ case X86II::MRM_C3:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC3, CurByte, OS);
+ break;
+ case X86II::MRM_C4:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC4, CurByte, OS);
+ break;
+ case X86II::MRM_C8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC8, CurByte, OS);
+ break;
+ case X86II::MRM_C9:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC9, CurByte, OS);
+ break;
+ case X86II::MRM_E8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xE8, CurByte, OS);
+ break;
+ case X86II::MRM_F0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF0, CurByte, OS);
+ break;
+ case X86II::MRM_F8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF8, CurByte, OS);
+ break;
+ case X86II::MRM_F9:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF9, CurByte, OS);
+ break;
+ case X86II::MRM_D0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD0, CurByte, OS);
+ break;
+ case X86II::MRM_D1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD1, CurByte, OS);
+ break;
+ }
+
+ // If there is a remaining operand, it must be a trailing immediate. Emit it
+ // according to the right size for the instruction.
+ if (CurOp != NumOps) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ const MCOperand &MO = MI.getOperand(CurOp++);
+ bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
+ unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+ RegNum |= GetX86RegNum(MO) << 4;
+ EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+ Fixups);
+ } else {
+ unsigned FixupKind;
+ // FIXME: Is there a better way to know that we need a signed relocation?
+ if (MI.getOpcode() == X86::ADD64ri32 ||
+ MI.getOpcode() == X86::MOV64ri32 ||
+ MI.getOpcode() == X86::MOV64mi32 ||
+ MI.getOpcode() == X86::PUSH64i32)
+ FixupKind = X86::reloc_signed_4byte;
+ else
+ FixupKind = getImmFixupKind(TSFlags);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
+ CurByte, OS, Fixups);
+ }
+ }
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+
+#ifndef NDEBUG
+ // FIXME: Verify.
+ if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
+ errs() << "Cannot encode all operands of: ";
+ MI.dump();
+ errs() << '\n';
+ abort();
+ }
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 0000000..f98d5e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,463 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "X86MCAsmInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+ Triple TheTriple(TT);
+ std::string FS;
+ if (TheTriple.getArch() == Triple::x86_64)
+ FS = "+64bit-mode";
+ else
+ FS = "-64bit-mode";
+ if (TheTriple.getOS() == Triple::NativeClient)
+ FS += ",+nacl-mode";
+ else
+ FS += ",-nacl-mode";
+ return FS;
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::x86_64)
+ return DWARFFlavour::X86_64;
+
+ if (TheTriple.isOSDarwin())
+ return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic;
+ if (TheTriple.getOS() == Triple::MinGW32 ||
+ TheTriple.getOS() == Triple::Cygwin)
+ // Unsupported by now, just quick fallback
+ return DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
+}
+
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
+unsigned X86_MC::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM8:
+ case X86::YMM0: case X86::YMM8: case X86::MM0:
+ return 0;
+ case X86::XMM1: case X86::XMM9:
+ case X86::YMM1: case X86::YMM9: case X86::MM1:
+ return 1;
+ case X86::XMM2: case X86::XMM10:
+ case X86::YMM2: case X86::YMM10: case X86::MM2:
+ return 2;
+ case X86::XMM3: case X86::XMM11:
+ case X86::YMM3: case X86::YMM11: case X86::MM3:
+ return 3;
+ case X86::XMM4: case X86::XMM12:
+ case X86::YMM4: case X86::YMM12: case X86::MM4:
+ return 4;
+ case X86::XMM5: case X86::XMM13:
+ case X86::YMM5: case X86::YMM13: case X86::MM5:
+ return 5;
+ case X86::XMM6: case X86::XMM14:
+ case X86::YMM6: case X86::YMM14: case X86::MM6:
+ return 6;
+ case X86::XMM7: case X86::XMM15:
+ case X86::YMM7: case X86::YMM15: case X86::MM7:
+ return 7;
+
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
+
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
+ default:
+ assert((int(RegNo) > 0) && "Unknown physical register!");
+ return 0;
+ }
+}
+
+void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
+ // FIXME: TableGen these.
+ for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
+ int SEH = X86_MC::getX86RegNum(Reg);
+ switch (Reg) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ SEH += 8;
+ break;
+ }
+ MRI->mapLLVMRegToSEHReg(Reg, SEH);
+ }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = X86_MC::ParseX86Triple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+ return X;
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitX86MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ unsigned RA = (TheTriple.getArch() == Triple::x86_64)
+ ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
+
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitX86MCRegisterInfo(X, RA,
+ X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true));
+ X86_MC::InitLLVM2SEHRegisterMapping(X);
+ return X;
+}
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool is64Bit = TheTriple.getArch() == Triple::x86_64;
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (is64Bit)
+ MAI = new X86_64MCAsmInfoDarwin(TheTriple);
+ else
+ MAI = new X86MCAsmInfoDarwin(TheTriple);
+ } else if (TheTriple.isOSWindows()) {
+ MAI = new X86MCAsmInfoCOFF(TheTriple);
+ } else {
+ MAI = new X86ELFMCAsmInfo(TheTriple);
+ }
+
+ // Initialize initial frame state.
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = is64Bit ? -8 : -4;
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ // Add return address to move list
+ MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP);
+ MAI->addInitialFrameState(0, CSDst, CSSrc);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ Triple T(TT);
+ bool is64Bit = T.getArch() == Triple::x86_64;
+
+ if (RM == Reloc::Default) {
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (T.isOSDarwin()) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::DynamicNoPIC;
+ } else if (T.isOSWindows() && is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::Static;
+ }
+
+ // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
+ // is defined as a model for code which may be used in static or dynamic
+ // executables but not necessarily a shared library. On X86-32 we just
+ // compile in -static mode, in x86-64 we use PIC.
+ if (RM == Reloc::DynamicNoPIC) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else if (!T.isOSDarwin())
+ RM = Reloc::Static;
+ }
+
+ // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+ // the Mach-O file format doesn't support it.
+ if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+ RM = Reloc::PIC_;
+
+ // For static codegen, if we're not already set, use Small codegen.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault)
+ // 64-bit JIT places everything in the same buffer except external funcs.
+ CM = is64Bit ? CodeModel::Large : CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows())
+ return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new X86ATTInstPrinter(MAI);
+ if (SyntaxVariant == 1)
+ return new X86IntelInstPrinter(MAI);
+ return 0;
+}
+
+static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new MCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+ RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+
+ // Register the MC codegen info.
+ RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
+ RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+ X86_MC::createX86MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+ X86_MC::createX86MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
+ createX86MCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
+ createX86MCInstrAnalysis);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
+ createX86MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
+ createX86MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
+ createX86MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
+ createX86MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 0000000..c144c51
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,103 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class raw_ostream;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
+namespace X86_MC {
+ std::string ParseX86Triple(StringRef TT);
+
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+ void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+ unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
+
+ unsigned getX86RegNum(unsigned RegNo);
+
+ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
+
+ /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT);
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
new file mode 100644
index 0000000..f0f1982
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -0,0 +1,554 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Object/MachOFormat.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+ void RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordTLVPRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void RecordX86Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+ void RecordX86_64Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+public:
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ if (Writer->is64Bit())
+ RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ else
+ RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+static bool isFixupKindRIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1: return 0;
+ case FK_PCRel_2:
+ case FK_Data_2: return 1;
+ case FK_PCRel_4:
+ // FIXME: Remove these!!!
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case FK_Data_4: return 2;
+ case FK_Data_8: return 3;
+ }
+}
+
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+ // See <reloc.h>.
+ uint32_t FixupOffset =
+ Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ uint32_t FixupAddress =
+ Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+ int64_t Value = 0;
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ Value = Target.getConstant();
+
+ if (IsPCRel) {
+ // Compensate for the relocation offset, Darwin x86_64 relocations only have
+ // the addend and appear to have attempted to define it to be the actual
+ // expression addend without the PCrel bias. However, instructions with data
+ // following the relocation are not accommodated for (see comment below
+ // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+ Value += 1LL << Log2Size;
+ }
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ Type = macho::RIT_X86_64_Unsigned;
+ Index = 0;
+
+ // FIXME: I believe this is broken, I don't think the linker can understand
+ // it. I think it would require a local relocation, but I'm not sure if that
+ // would work either. The official way to get an absolute PCrel relocation
+ // is to use an absolute symbol (which we don't support yet).
+ if (IsPCRel) {
+ IsExtern = 1;
+ Type = macho::RIT_X86_64_Branch;
+ }
+ } else if (Target.getSymB()) { // A - B + constant
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+ const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+ const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+ // Neither symbol can be modified.
+ if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported relocation of modified symbol");
+
+ // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+ // implement most of these correctly.
+ if (IsPCRel)
+ report_fatal_error("unsupported pc-relative relocation of difference");
+
+ // The support for the situation where one or both of the symbols would
+ // require a local relocation is handled just like if the symbols were
+ // external. This is certainly used in the case of debug sections where the
+ // section has only temporary symbols and thus the symbols don't have base
+ // symbols. This is encoded using the section ordinal and non-extern
+ // relocation entries.
+
+ // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+ // single SIGNED relocation); reject it for now. Except the case where both
+ // symbols don't have a base, equal but both NULL.
+ if (A_Base == B_Base && A_Base)
+ report_fatal_error("unsupported relocation with identical base");
+
+ Value += Writer->getSymbolAddress(&A_SD, Layout) -
+ (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+ Value -= Writer->getSymbolAddress(&B_SD, Layout) -
+ (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+
+ if (A_Base) {
+ Index = A_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Unsigned;
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+
+ if (B_Base) {
+ Index = B_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Subtractor;
+ } else {
+ const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(&SD);
+
+ // Relocations inside debug sections always use local relocations when
+ // possible. This seems to be done because the debugger doesn't fully
+ // understand x86_64 relocation entries, and expects to find values that
+ // have already been fixed up.
+ if (Symbol->isInSection()) {
+ const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+ Fragment->getParent()->getSection());
+ if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+ Base = 0;
+ }
+
+ // x86_64 almost always uses external relocations, except when there is no
+ // symbol to use as a base address (a local symbol with no preceding
+ // non-local symbol).
+ if (Base) {
+ Index = Base->getIndex();
+ IsExtern = 1;
+
+ // Add the local offset, if needed.
+ if (Base != &SD)
+ Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+ } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+ // The index is the section ordinal (1-based).
+ Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ Value += Writer->getSymbolAddress(&SD, Layout);
+
+ if (IsPCRel)
+ Value -= FixupAddress + (1 << Log2Size);
+ } else if (Symbol->isVariable()) {
+ const MCExpr *Value = Symbol->getVariableValue();
+ int64_t Res;
+ bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+ Writer->getSectionAddressMap());
+ if (isAbs) {
+ FixedValue = Res;
+ return;
+ } else {
+ report_fatal_error("unsupported relocation of variable '" +
+ Symbol->getName() + "'");
+ }
+ } else {
+ report_fatal_error("unsupported relocation of undefined symbol '" +
+ Symbol->getName() + "'");
+ }
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+ if (IsPCRel) {
+ if (IsRIPRel) {
+ if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+ // rewrite the movq to an leaq at link time if the symbol ends up in
+ // the same linkage unit.
+ if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+ Type = macho::RIT_X86_64_GOTLoad;
+ else
+ Type = macho::RIT_X86_64_GOT;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ Type = macho::RIT_X86_64_TLV;
+ } else if (Modifier != MCSymbolRefExpr::VK_None) {
+ report_fatal_error("unsupported symbol modifier in relocation");
+ } else {
+ Type = macho::RIT_X86_64_Signed;
+
+ // The Darwin x86_64 relocation format has a problem where it cannot
+ // encode an address (L<foo> + <constant>) which is outside the atom
+ // containing L<foo>. Generally, this shouldn't occur but it does
+ // happen when we have a RIPrel instruction with data following the
+ // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+ // adjustment Darwin x86_64 uses, the offset is still negative and the
+ // linker has no way to recognize this.
+ //
+ // To work around this, Darwin uses several special relocation types
+ // to indicate the offsets. However, the specification or
+ // implementation of these seems to also be incomplete; they should
+ // adjust the addend as well based on the actual encoded instruction
+ // (the additional bias), but instead appear to just look at the final
+ // offset.
+ switch (-(Target.getConstant() + (1LL << Log2Size))) {
+ case 1: Type = macho::RIT_X86_64_Signed1; break;
+ case 2: Type = macho::RIT_X86_64_Signed2; break;
+ case 4: Type = macho::RIT_X86_64_Signed4; break;
+ }
+ }
+ } else {
+ if (Modifier != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported symbol modifier in branch "
+ "relocation");
+
+ Type = macho::RIT_X86_64_Branch;
+ }
+ } else {
+ if (Modifier == MCSymbolRefExpr::VK_GOT) {
+ Type = macho::RIT_X86_64_GOT;
+ } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+ // case all we do is set the PCrel bit in the relocation entry; this is
+ // used with exception handling, for example. The source is required to
+ // include any necessary offset directly.
+ Type = macho::RIT_X86_64_GOT;
+ IsPCRel = 1;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ report_fatal_error("TLVP symbol modifier should have been rip-rel");
+ } else if (Modifier != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported symbol modifier in relocation");
+ else
+ Type = macho::RIT_X86_64_Unsigned;
+ }
+ }
+
+ // x86_64 always writes custom values into the fixups.
+ FixedValue = Value;
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ //
+ // Note that there is no longer any semantic difference between these two
+ // relocation types from the linkers point of view, this is done solely for
+ // pedantic compatibility with 'as'.
+ Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+ (unsigned)macho::RIT_Generic_LocalDifference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+ !is64Bit() &&
+ "Should only be called with a 32-bit TLVP relocation!");
+
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+ uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = 0;
+
+ // Get the symbol data.
+ MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+ unsigned Index = SD_A->getIndex();
+
+ // We're only going to have a second symbol in pic mode and it'll be a
+ // subtraction from the picbase. For 32-bit pic the addend is the difference
+ // between the picbase and the next address. For 32-bit static the addend is
+ // zero.
+ if (Target.getSymB()) {
+ // If this is a subtraction then we're pcrel.
+ uint32_t FixupAddress =
+ Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+ MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+ IsPCRel = 1;
+ FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
+ Target.getConstant());
+ FixedValue += 1ULL << Log2Size;
+ } else {
+ FixedValue = 0;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = Value;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // Extern
+ (macho::RIT_Generic_TLV << 28)); // Type
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+ // If this is a 32-bit TLVP reloc it's handled a bit differently.
+ if (Target.getSymA() &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+ RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB())
+ return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // If this is an internal relocation with an offset, it also needs a scattered
+ // relocation entry.
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel)
+ Offset += 1 << Log2Size;
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+ return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ Type = macho::RIT_Vanilla;
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ Type = macho::RIT_Vanilla;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 0000000..52a67f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() {
+ RegisterTarget<Triple::x86, /*HasJIT=*/true>
+ X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+ RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+ Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 0000000..aeb3309
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,243 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+}
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back(4+(Imm & 3));
+ Imm >>= 2;
+ }
+}
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm & 3));
+ Imm >>= 2;
+ }
+ ShuffleMask.push_back(4);
+ ShuffleMask.push_back(5);
+ ShuffleMask.push_back(6);
+ ShuffleMask.push_back(7);
+}
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(VT, ShuffleMask);
+}
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2);
+ ShuffleMask.push_back(i+NElts+NElts/2);
+ }
+}
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts + NElts);
+ Imm /= NElts;
+ }
+}
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2); // Reads from dest
+ ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ }
+}
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ unsigned Start = 0;
+ unsigned End = NumLaneElts / 2;
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumLaneElts;
+ End += NumLaneElts;
+ }
+}
+
+// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes and the mask of the first lane must
+// be the same of the second.
+void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumLanes = (NumElts*32)/128;
+ unsigned LaneSize = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ unsigned Idx = (Imm >> (i*2)) & 0x3 ;
+ ShuffleMask.push_back(Idx+(l*LaneSize));
+ }
+ }
+}
+
+// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes but the mask of the first lane can
+// be the different of the second (not like VPERMILPS).
+void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumLanes = (NumElts*64)/128;
+ unsigned LaneSize = NumElts/NumLanes;
+
+ for (unsigned l = 0; l < NumLanes; ++l) {
+ for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ unsigned Idx = (Imm >> i) & 0x1;
+ ShuffleMask.push_back(Idx+(l*LaneSize));
+ }
+ }
+}
+
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+ unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
+ unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
+
+ for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+}
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
+ // has information about the instruction and not the types. So for
+ // instruction comments purpose, assume the 256-bit vector is v4i64.
+ return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
+}
+
+} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 0000000..58193e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,107 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+ SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+
+// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes and the mask of the first lane must
+// be the same of the second.
+void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes but the mask of the first lane can
+// be the different of the second (not like VPERMILPS).
+void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
new file mode 100644
index 0000000..81e9422
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -0,0 +1,71 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class JITCodeEmitter;
+class MachineCodeEmitter;
+class Target;
+class X86TargetMachine;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
+/// before each call to avoid transition penalty between functions encoded with
+/// AVX and SSE.
+FunctionPass *createX86IssueVZeroUpperPass();
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
+/// which determines whether the frame pointer register should be
+/// reserved in case dynamic stack alignment is later required.
+///
+FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
new file mode 100644
index 0000000..104b91f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -0,0 +1,268 @@
+//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, referred
+// to here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget state.
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+ "64-bit mode (x86_64)">;
+
+def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
+ "Native Client mode">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
+ "Enable conditional move instructions">;
+
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
+
+def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+ "Enable MMX instructions">;
+def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+ "Enable SSE instructions",
+ // SSE codegen depends on cmovs, and all
+ // SSE1+ processors support them.
+ [FeatureMMX, FeatureCMOV]>;
+def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+ "Enable SSE2 instructions",
+ [FeatureSSE1]>;
+def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+ "Enable SSE3 instructions",
+ [FeatureSSE2]>;
+def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+ "Enable SSSE3 instructions",
+ [FeatureSSE3]>;
+def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+ "Enable SSE 4.1 instructions",
+ [FeatureSSSE3]>;
+def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+ "Enable SSE 4.2 instructions",
+ [FeatureSSE41, FeaturePOPCNT]>;
+def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
+def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+ "Enable 3DNow! Athlon instructions",
+ [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions",
+ [FeatureCMOV]>;
+def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+ "64-bit with cmpxchg16b",
+ [Feature64Bit]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+ "IsUAMemFast", "true",
+ "Fast unaligned memory access">;
+def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+ "Support SSE 4a instructions",
+ [FeaturePOPCNT]>;
+
+def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
+ "Enable AVX instructions">;
+def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
+ "Enable carry-less multiplication instructions">;
+def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
+ "Enable three-operand fused multiple-add">;
+def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
+ "Enable four-operand fused multiple-add">;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+ "HasVectorUAMem", "true",
+ "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
+ "Enable AES instructions">;
+def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
+ "Support MOVBE instruction">;
+def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+ "Support RDRAND instruction">;
+def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
+ "Support 16-bit floating point conversion instructions">;
+def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
+ "Support LZCNT instruction">;
+def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
+ "Support BMI instructions">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"i386", []>;
+def : Proc<"i486", []>;
+def : Proc<"i586", []>;
+def : Proc<"pentium", []>;
+def : Proc<"pentium-mmx", [FeatureMMX]>;
+def : Proc<"i686", []>;
+def : Proc<"pentiumpro", [FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
+def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE,
+ FeatureSlowBTMem]>;
+// "Arrandale" along with corei3 and corei5
+def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>;
+def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem]>;
+// Westmere is a similar machine to nehalem with some additional features.
+// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
+def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem, FeatureAES,
+ FeatureCLMUL]>;
+// Sandy Bridge
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL]>;
+// Ivy Bridge
+def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL,
+ FeatureRDRAND, FeatureF16C]>;
+
+// Haswell
+def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
+ FeatureCLMUL, FeatureRDRAND, FeatureF16C,
+ FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI]>;
+
+def : Proc<"k6", [FeatureMMX]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSSE4A, Feature3DNowA]>;
+def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A,
+ Feature3DNowA]>;
+
+def : Proc<"winchip-c6", [FeatureMMX]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+// Currently the X86 assembly parser only supports ATT syntax.
+def ATTAsmParser : AsmParser {
+ string AsmParserClassName = "ATTAsmParser";
+ int Variant = 0;
+
+ // Discard comments in assembly strings.
+ string CommentDelimiter = "#";
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "%";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+ string AsmWriterClassName = "ATTInstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+def IntelAsmWriter : AsmWriter {
+ string AsmWriterClassName = "IntelInstPrinter";
+ int Variant = 1;
+ bit isMCAsmWriter = 1;
+}
+
+def X86 : Target {
+ // Information about the instructions...
+ let InstructionSet = X86InstrInfo;
+
+ let AssemblyParsers = [ATTAsmParser];
+
+ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
new file mode 100644
index 0000000..4c3ff02
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -0,0 +1,715 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to X86 machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+/// runOnMachineFunction - Emit the function body.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+ bool Intrn = MF.getFunction()->hasInternalLinkage();
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Have common code print out the function header with linkage info etc.
+ EmitFunctionHeader();
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // We didn't modify anything.
+ return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand. This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown symbol type!");
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+
+ MCSymbol *GVSym;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+ GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
+ else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ else
+ GVSym = Mang->getSymbol(GV);
+
+ // Handle dllimport linkage.
+ if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+ GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (GVSym->getName()[0] != '$')
+ O << *GVSym;
+ else
+ O << '(' << *GVSym << ')';
+ printOffset(MO.getOffset(), O);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const MCSymbol *SymToPrint;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ SmallString<128> TempNameStr;
+ TempNameStr += StringRef(MO.getSymbolName());
+ TempNameStr += StringRef("$stub");
+
+ MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
+ true);
+ }
+ SymToPrint = StubSym.getPointer();
+ } else {
+ SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (SymToPrint->getName()[0] != '$')
+ O << *SymToPrint;
+ else
+ O << '(' << *SymToPrint << '(';
+ break;
+ }
+ }
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ break;
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ // These affect the name of the symbol, not any suffix.
+ break;
+ case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+ O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+ break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ O << '-' << *MF->getPICBaseSymbol();
+ break;
+ case X86II::MO_TLSGD: O << "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: O << "@TPOFF"; break;
+ case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
+ case X86II::MO_GOT: O << "@GOT"; break;
+ case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
+ case X86II::MO_PLT: O << "@PLT"; break;
+ case X86II::MO_TLVP: O << "@TLVP"; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
+ break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ printOperand(MI, OpNo, O);
+ return;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ printSymbolOperand(MO, O);
+ return;
+ }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type!");
+ case MachineOperand::MO_Register: {
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << X86ATTInstPrinter::getRegisterName(Reg);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ O << '$' << MO.getImm();
+ return;
+
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol: {
+ O << '$';
+ printSymbolOperand(MO, O);
+ break;
+ }
+ }
+}
+
+void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ // If we really don't want to print out (rip), don't.
+ bool HasBaseReg = BaseReg.getReg() != 0;
+ if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+ BaseReg.getReg() == X86::RIP)
+ HasBaseReg = false;
+
+ // HasParenPart - True if we will print out the () part of the mem ref.
+ bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+
+ if (DispSpec.isImm()) {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || !HasParenPart)
+ O << DispVal;
+ } else {
+ assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+ DispSpec.isJTI() || DispSpec.isSymbol());
+ printSymbolOperand(MI->getOperand(Op+3), O);
+ }
+
+ if (Modifier && strcmp(Modifier, "H") == 0)
+ O << "+8";
+
+ if (HasParenPart) {
+ assert(IndexReg.getReg() != X86::ESP &&
+ "X86 doesn't allow scaling by ESP");
+
+ O << '(';
+ if (HasBaseReg)
+ printOperand(MI, Op, O, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, O, Modifier);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ const MachineOperand &Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, O, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, O, Modifier);
+}
+
+void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O) {
+ O << *MF->getPICBaseSymbol() << '\n';
+ O << *MF->getPICBaseSymbol() << ':';
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+ raw_ostream &O) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ case 'q': // Print DImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ break;
+ }
+
+ O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // This is an address. Currently only 'i' and 'r' are expected.
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+ printSymbolOperand(MO, O);
+ if (Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
+ return false;
+ }
+ if (MO.isReg()) {
+ O << '(';
+ printOperand(MI, OpNo, O);
+ O << ')';
+ return false;
+ }
+ return true;
+
+ case 'c': // Don't print "$" before a global var name or constant.
+ if (MO.isImm())
+ O << MO.getImm();
+ else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+ printSymbolOperand(MO, O);
+ else
+ printOperand(MI, OpNo, O);
+ return false;
+
+ case 'A': // Print '*' before a register (it must be a register)
+ if (MO.isReg()) {
+ O << '*';
+ printOperand(MI, OpNo, O);
+ return false;
+ }
+ return true;
+
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print DImode register
+ if (MO.isReg())
+ return printAsmMRegister(MO, ExtraCode[0], O);
+ printOperand(MI, OpNo, O);
+ return false;
+
+ case 'P': // This is the operand of a call, treat specially.
+ print_pcrel_imm(MI, OpNo, O);
+ return false;
+
+ case 'n': // Negate the immediate or print a '-' before the operand.
+ // Note: this is a temporary solution. It should be handled target
+ // independently as part of the 'MC' work.
+ if (MO.isImm()) {
+ O << -MO.getImm();
+ return false;
+ }
+ O << '-';
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print SImode register
+ // These only apply to registers, ignore on mem.
+ break;
+ case 'H':
+ printMemReference(MI, OpNo, O, "H");
+ return false;
+ case 'P': // Don't print @PLT, but do print as memory.
+ printMemReference(MI, OpNo, O, "no-rip");
+ return false;
+ }
+ }
+ printMemReference(MI, OpNo, O);
+ return false;
+}
+
+void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetEnvMacho())
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetEnvMacho()) {
+ // All darwin targets use mach-o.
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output stubs for dynamically-linked functions.
+ MachineModuleInfoMachO::SymbolListTy Stubs;
+
+ Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__jump_table",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 5, SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+ MCSA_IndirectSymbol);
+ // hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
+ const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
+ OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Output stubs for external and common global variables.
+ Stubs = MMIMacho.GetGVStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__pointers",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$non_lazy_ptr:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
+ MCSA_IndirectSymbol);
+ // .long 0
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs. However, sometimes the types are local to the file. So
+ // we need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$non_lazy_ptr:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+ MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+ StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ }
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+ // Emit type information for external functions
+ typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+ for (externals_iterator I = COFFMMI.externals_begin(),
+ E = COFFMMI.externals_end();
+ I != E; ++I) {
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Necessary for dllexport support
+ std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getSymbol(I));
+
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+ SmallString<128> name;
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedGlobals[i]->getName();
+ if (Subtarget->isTargetWindows())
+ name += ",DATA";
+ else
+ name += ",data";
+ OutStreamer.EmitBytes(name, 0);
+ }
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedFns[i]->getName();
+ OutStreamer.EmitBytes(name, 0);
+ }
+ }
+ }
+
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const TargetData *TD = TM.getTargetData();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(), 0);
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &O) {
+ // Only the target-dependent form of DBG_VALUE should get here.
+ // Referencing the offset and metadata as NOps-2 and NOps-1 is
+ // probably portable to other targets; frame pointer location is not.
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==7);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ if (V.getContext().isSubprogram())
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() {
+ RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+ RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
new file mode 100644
index 0000000..3a50435
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -0,0 +1,87 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+class MCContext;
+class MCInst;
+class MCStreamer;
+class MCSymbol;
+
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
+ const X86Subtarget *Subtarget;
+ public:
+ explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 AT&T-Style Assembly Printer";
+ }
+
+ const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+ virtual void EmitStartOfAsmFile(Module &M);
+
+ virtual void EmitEndOfAsmFile(Module &M);
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+ void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+ const char *Modifier=NULL);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+ const char *Modifier=NULL);
+
+ void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 0000000..4326814
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,20 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+using namespace llvm;
+
+
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 0000000..98ab2a6
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,46 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "X86MachineFunctionInfo.h"
+
+namespace llvm {
+ class X86MachineFunctionInfo;
+ class TargetData;
+
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+ DenseSet<MCSymbol const *> Externals;
+public:
+ X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
+ virtual ~X86COFFMachineModuleInfo();
+
+ void addExternalFunction(MCSymbol* Symbol) {
+ Externals.insert(Symbol);
+ }
+
+ typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+ externals_iterator externals_begin() const { return Externals.begin(); }
+ externals_iterator externals_end() const { return Externals.end(); }
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 0000000..77b9905
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,401 @@
+//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+ // Scalar values are returned in AX first, then DX. For i8, the ABI
+ // requires the values to be in AL and AH, however this code uses AL and DL
+ // instead. This is because using AH for the second register conflicts with
+ // the way LLVM does multiple return values -- a return of {i16,i8} would end
+ // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+ // for functions that return two i8 values are currently expected to pack the
+ // values into an i16 (which uses AX, and thus AL:AH).
+ CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
+ // can only be used by ABI non-compliant code. If the target doesn't have XMM
+ // registers, it won't have vector types.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+ // can only be used by ABI non-compliant code. This vector type is only
+ // supported while using the AVX target feature.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // MMX vector types are always returned in MM0. If the target doesn't have
+ // MM0, it doesn't support these vector types.
+ CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
+
+ // Long double types are always returned in ST0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+ // The X86-32 calling convention returns FP values in ST0, unless marked
+ // with "inreg" (used here to distinguish one kind of reg from another,
+ // weirdly; this is really the sse-regparm calling convention) in which
+ // case they use XMM0, otherwise it is the same as the common X86 calling
+ // conv.
+ CCIfInReg<CCIfSubtarget<"hasXMMInt()",
+ CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+ CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+ // SSE2.
+ // This can happen when a float, 2 x float, or 3 x float vector is split by
+ // target lowering, and is returned in 1-3 sse regs.
+ CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+ // For integers, ECX can be used as an extra return register
+ CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+ // Otherwise, it is the same as the common X86 calling convention.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+ // The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+ // MMX vector types are always returned in XMM0.
+ CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+ // The X86-Win64 calling convention always returns __m64 values in RAX.
+ CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+ // Otherwise, everything is the same as 'normal' X86-64 C CC.
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+ // If FastCC, use RetCC_X86_32_Fast.
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // Otherwise, use RetCC_X86_32_C.
+ CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+ CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
+ CCIfType<[x86mmx],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCIfSubtarget<"hasXMMInt()",
+ CCPromoteToType<v2i64>>>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasXMM()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+ // FIXME: Handle byval stuff.
+ // FIXME: Handle varargs.
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // 128 bit vectors are passed by pointer
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+ // The first 4 MMX vector arguments are passed in GPRs.
+ CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Do not pass the sret argument in RCX, the Win64 thiscall calling
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
+ [XMM1, XMM2, XMM3]>>>>,
+
+ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>
+]>;
+
+def CC_X86_64_GHC : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+ CCIfType<[i64],
+ CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+ // Pass in STG registers: F1, F2, F3, F4, D1, D2
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasXMM()",
+ CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // The first 3 float or double arguments, if marked 'inreg' and if the call
+ // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasXMMInt()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+ // The first 3 __m64 vector arguments are passed in mmx registers if the
+ // call is not a vararg call.
+ CCIfNotVarArg<CCIfType<[x86mmx],
+ CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Doubles get 8-byte slots that are 4-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // Long doubles get slots whose size depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+ // The first 4 SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+ // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+ // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
+ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+ // passed in the parameter area.
+ CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in ECX.
+ CCIfNest<CCAssignToReg<[ECX]>>,
+
+ // The first 3 integer arguments, if marked 'inreg' and if the call is not
+ // a vararg call, are passed in integer registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_ThisCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first integer argument is passed in ECX
+ CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+ // Handles byval parameters. Note that we can't rely on the delegation
+ // to CC_X86_32_Common for this because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // The first 3 float or double arguments, if the call is not a vararg
+ // call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasXMMInt()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+ // Doubles get 8-byte slots that are 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_GHC : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1
+ CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+ CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+ // Otherwise, drop to normal X86-32 CC
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+ CCDelegateTo<CC_X86_32>
+]>;
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 0000000..aeff03a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,999 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+ template<class CodeEmitter>
+ class Emitter : public MachineFunctionPass {
+ const X86InstrInfo *II;
+ const TargetData *TD;
+ X86TargetMachine &TM;
+ CodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ intptr_t PICBaseOffset;
+ bool Is64BitMode;
+ bool IsPIC;
+ public:
+ static char ID;
+ explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+ const X86InstrInfo &ii, const TargetData &td, bool is64)
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Machine Code Emitter";
+ }
+
+ void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp = 0, intptr_t PCAdj = 0,
+ bool Indirect = false);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+ intptr_t PCAdj = 0);
+ void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj = 0);
+
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ intptr_t Adj = 0, bool IsPCRel = true);
+
+ void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+ void emitRegModRMByte(unsigned RegOpcodeField);
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+ void emitConstant(uint64_t Val, unsigned Size);
+
+ void emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj = 0);
+ };
+
+template<class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+} // end anonymous namespace.
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified templated MachineCodeEmitter object.
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ II = TM.getInstrInfo();
+ TD = TM.getTargetData();
+ Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ do {
+ DEBUG(dbgs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MCInstrDesc &Desc = I->getDesc();
+ emitInstruction(*I, &Desc);
+ // MOVPC32r is basically a call plus a pop instruction.
+ if (Desc.getOpcode() == X86::MOVPC32r)
+ emitInstruction(*I, &II->get(X86::POP32r));
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ }
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const MCInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+ // Remember where this reference was and where it is to so we can
+ // deal with it later.
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, MBB));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+ unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */,
+ bool Indirect /* = false */) {
+ intptr_t RelocCST = Disp;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ RelocCST, false)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), RelocCST, false);
+ MCE.addRelocation(MR);
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+
+ // X86 never needs stubs because instruction selection will always pick
+ // an instruction sequence that is large enough to hold any address
+ // to a symbol.
+ // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
+ bool NeedStub = false;
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, RelocCST,
+ 0, NeedStub));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+ unsigned RegOpcodeFld){
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
+ unsigned Index,
+ unsigned Base) {
+ // SIB byte is in the same format as the ModRMByte...
+ MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+ // Output the constant in little endian byte order...
+ for (unsigned i = 0; i != Size; ++i) {
+ MCE.emitByte(Val & 255);
+ Val >>= 8;
+ }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+ const TargetMachine &TM) {
+ // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
+ // mechanism as 32-bit mode.
+ if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+ return false;
+
+ // Return true if this is a reference to a stub containing the address of the
+ // global, not the global itself.
+ return isGlobalStubReference(GVOp.getTargetFlags());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+ int DispVal,
+ intptr_t Adj /* = 0 */,
+ bool IsPCRel /* = true */) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (!RelocOp) {
+ emitConstant(DispVal, 4);
+ return;
+ }
+
+ // Otherwise, this is something that requires a relocation. Emit it as such
+ // now.
+ unsigned RelocType = Is64BitMode ?
+ (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (RelocOp->isGlobal()) {
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial. If the MCE supports using RIP directly
+ // do it, otherwise fallback to absolute (this is determined by IsPCRel).
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+ emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+ Adj, Indirect);
+ } else if (RelocOp->isSymbol()) {
+ emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
+ } else if (RelocOp->isCPI()) {
+ emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+ RelocOp->getOffset(), Adj);
+ } else {
+ assert(RelocOp->isJTI() && "Unexpected machine operand!");
+ emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op,unsigned RegOpcodeField,
+ intptr_t PCAdj) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isSymbol()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+ DispVal += Op3.getOffset();
+ }
+ } else if (Op3.isJTI()) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+ }
+ } else {
+ DispVal = Op3.getImm();
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &Scale = MI.getOperand(Op+1);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP ||
+ (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+ assert(IndexReg.getReg() == 0 && Is64BitMode &&
+ "Invalid rip-relative address");
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+ return;
+ }
+
+ // Indicate that the displacement will use an pcrel or absolute reference
+ // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+ // while others, unless explicit asked to use RIP, use absolute references.
+ bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
+ // Is a SIB byte needed?
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
+ unsigned BaseRegNo = -1U;
+ if (BaseReg != 0 && BaseReg != X86::RIP)
+ BaseRegNo = X86_MC::getX86RegNum(BaseReg);
+
+ if (// The SIB byte must be used if there is an index register.
+ IndexReg.getReg() == 0 &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ (!Is64BitMode || BaseReg != 0)) {
+ if (BaseReg == 0 || // [disp32] in X86-32 mode
+ BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+ return;
+ }
+
+ // If the base is not EBP/ESP and there is no displacement, use simple
+ // indirect register encoding, this handles addresses like [EAX]. The
+ // encoding for [EBP] with no displacement means [disp32] so we handle it
+ // by emitting a displacement of 0 below.
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+ return;
+ }
+
+ // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+ if (!DispForReloc && isDisp8(DispVal)) {
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+ emitConstant(DispVal, 1);
+ return;
+ }
+
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+ return;
+ }
+
+ // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispForReloc) {
+ // Emit the normal disp32 encoding.
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ } else if (isDisp8(DispVal)) {
+ // Emit the disp8 encoding...
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding...
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
+ emitSIBByte(SS, IndexRegNo, 5);
+ } else {
+ unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg);
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, BaseRegNo);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8) {
+ emitConstant(DispVal, 1);
+ } else if (DispVal != 0 || ForceDisp32) {
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+ const MCInstrDesc *Desc) {
+ DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+ case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+ case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+ case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+ case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+ case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+ case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+ case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+ case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+ }
+
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ unsigned Opcode = Desc->Opcode;
+
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK)
+ MCE.emitByte(0xF0);
+
+ // Emit segment override opcode prefix as needed.
+ switch (Desc->TSFlags & X86II::SegOvrMask) {
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ default: llvm_unreachable("Invalid segment!");
+ case 0: break; // No segment override!
+ }
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+ MCE.emitByte(0xF3);
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize)
+ MCE.emitByte(0x66);
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize)
+ MCE.emitByte(0x67);
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::TF: // F2 0F 38
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: llvm_unreachable("Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ // Handle REX prefix.
+ if (Is64BitMode) {
+ if (unsigned REX = determineREX(MI))
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TF: // F2 0F 38
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
+ }
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
+ ++CurOp;
+ else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default:
+ llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ llvm_unreachable("pseudo instructions should be removed before code"
+ " emission");
+ break;
+ // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
+ // to make it slightly easier to see.
+ case X86::Int_MemBarrier:
+ DEBUG(dbgs() << "#MEMBARRIER\n");
+ break;
+
+ case TargetOpcode::INLINEASM:
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0])
+ report_fatal_error("JIT does not support inline asm!");
+ break;
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::GC_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ MCE.emitByte(BaseOpcode);
+ emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
+ // Remember PIC base.
+ PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+ X86JITInfo *JTI = TM.getJITInfo();
+ JTI->setPICBase(MCE.getCurrentPCValue());
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm: {
+ MCE.emitByte(BaseOpcode);
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+
+ DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+ DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+ DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+ DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+ DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
+
+ if (MO.isMBB()) {
+ emitPCRelativeBlockAddress(MO.getMBB());
+ break;
+ }
+
+ if (MO.isGlobal()) {
+ emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+ MO.getOffset(), 0);
+ break;
+ }
+
+ if (MO.isSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
+ if (MO.isJTI()) {
+ emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ assert(MO.isImm() && "Unknown RawFrm operand!");
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+ Opcode == X86::WINCALL64pcrel32) {
+ // Fix up immediate operand for pc relative calls.
+ intptr_t Imm = (intptr_t)MO.getImm();
+ Imm = Imm - MCE.getCurrentPCValue() - 4;
+ emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
+ } else
+ emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::AddRegFrm: {
+ MCE.emitByte(BaseOpcode +
+ X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri64i32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ // This should not occur on Darwin for relocatable objects.
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRMDestReg: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+ case X86II::MRMDestMem: {
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp,
+ X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
+ .getReg()));
+ CurOp += X86::AddrNumOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+
+ intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+ X86II::getSizeOfImm(Desc->TSFlags) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp+1,
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
+ CurOp += AddrOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
+ X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
+ CurOp += X86::AddrNumOperands;
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO.isImm()) {
+ emitConstant(MO.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+ emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+ Indirect);
+ } else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), rt);
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ MCE.emitByte(BaseOpcode);
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
+ ++CurOp;
+ break;
+
+ case X86II::MRM_C1:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC1);
+ break;
+ case X86II::MRM_C8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC8);
+ break;
+ case X86II::MRM_C9:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC9);
+ break;
+ case X86II::MRM_E8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xE8);
+ break;
+ case X86II::MRM_F0:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xF0);
+ break;
+ }
+
+ if (!Desc->isVariadic() && CurOp != NumOps) {
+#ifndef NDEBUG
+ dbgs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 0000000..4a72d15
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,153 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+#include "X86Relocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the X86ELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+ : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+ EMachine = is64Bit ? EM_X86_64 : EM_386;
+ }
+
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ if (is64Bit) {
+ switch(MachineRelTy) {
+ case X86::reloc_pcrel_word:
+ return ELF::R_X86_64_PC32;
+ case X86::reloc_absolute_word:
+ return ELF::R_X86_64_32;
+ case X86::reloc_absolute_word_sext:
+ return ELF::R_X86_64_32S;
+ case X86::reloc_absolute_dword:
+ return ELF::R_X86_64_64;
+ case X86::reloc_picrel_word:
+ default:
+ llvm_unreachable("unknown x86_64 machine relocation type");
+ }
+ } else {
+ switch(MachineRelTy) {
+ case X86::reloc_pcrel_word:
+ return ELF::R_386_PC32;
+ case X86::reloc_absolute_word:
+ return ELF::R_386_32;
+ case X86::reloc_absolute_word_sext:
+ case X86::reloc_absolute_dword:
+ case X86::reloc_picrel_word:
+ default:
+ llvm_unreachable("unknown x86 machine relocation type");
+ }
+ }
+ return 0;
+}
+
+long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case ELF::R_X86_64_PC32: return Modifier - 4;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
+ return Modifier;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case ELF::R_386_PC32: return Modifier - 4;
+ case ELF::R_386_32: return Modifier;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ return 32;
+ case ELF::R_X86_64_64:
+ return 64;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case ELF::R_386_PC32:
+ case ELF::R_386_32:
+ return 32;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case ELF::R_X86_64_PC32:
+ return true;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
+ return false;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case ELF::R_386_PC32:
+ return true;
+ case ELF::R_386_32:
+ return false;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return is64Bit ?
+ X86::reloc_absolute_dword : X86::reloc_absolute_word;
+}
+
+long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+
+ if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
+ return SymOffset - (RelOffset + 4);
+ else
+ assert(0 && "computeRelocation unknown for this relocation type");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 0000000..a45b5bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class X86ELFWriterInfo : public TargetELFWriterInfo {
+
+ public:
+ X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+ virtual ~X86ELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 0000000..f912b28
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,2162 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// StackPtr - Register used as the stack pointer.
+ ///
+ unsigned StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf64;
+ bool X86ScalarSSEf32;
+
+public:
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+ X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX();
+ }
+
+ virtual bool TargetSelectInstruction(const Instruction *I);
+
+ /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+
+#include "X86GenFastISel.inc"
+
+private:
+ bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+
+ bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+
+ bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+ bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
+
+ bool X86SelectLoad(const Instruction *I);
+
+ bool X86SelectStore(const Instruction *I);
+
+ bool X86SelectRet(const Instruction *I);
+
+ bool X86SelectCmp(const Instruction *I);
+
+ bool X86SelectZExt(const Instruction *I);
+
+ bool X86SelectBranch(const Instruction *I);
+
+ bool X86SelectShift(const Instruction *I);
+
+ bool X86SelectSelect(const Instruction *I);
+
+ bool X86SelectTrunc(const Instruction *I);
+
+ bool X86SelectFPExt(const Instruction *I);
+ bool X86SelectFPTrunc(const Instruction *I);
+
+ bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+ bool X86SelectCall(const Instruction *I);
+
+ bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine()->getInstrInfo();
+ }
+ const X86TargetMachine *getTargetMachine() const {
+ return static_cast<const X86TargetMachine *>(&TM);
+ }
+
+ unsigned TargetMaterializeConstant(const Constant *C);
+
+ unsigned TargetMaterializeAlloca(const AllocaInst *C);
+
+ unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool IsMemcpySmall(uint64_t Len);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ VT = evt.getSimpleVT();
+ // For now, require SSE/SSE2 for performing floating-point operations,
+ // since x87 requires additional work.
+ if (VT == MVT::f64 && !X86ScalarSSEf64)
+ return false;
+ if (VT == MVT::f32 && !X86ScalarSSEf32)
+ return false;
+ // Similarly, no f80 support yet.
+ if (VT == MVT::f80)
+ return false;
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
+ unsigned &ResultReg) {
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), ResultReg), AM);
+ return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+ // Get opcode and regclass of the output for the given store instruction.
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f80: // No f80 support yet.
+ default: return false;
+ case MVT::i1: {
+ // Mask out all but lowest bit.
+ unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+ Val = AndResult;
+ }
+ // FALLTHROUGH, handling i1 as i8.
+ case MVT::i8: Opc = X86::MOV8mr; break;
+ case MVT::i16: Opc = X86::MOV16mr; break;
+ case MVT::i32: Opc = X86::MOV32mr; break;
+ case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::f32:
+ Opc = X86ScalarSSEf32 ?
+ (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ break;
+ case MVT::f64:
+ Opc = X86ScalarSSEf64 ?
+ (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ break;
+ }
+
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM).addReg(Val);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
+ const X86AddressMode &AM) {
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Val))
+ Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
+
+ // If this is a store of a simple constant, fold the constant into the store.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ unsigned Opc = 0;
+ bool Signed = true;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
+ case MVT::i8: Opc = X86::MOV8mi; break;
+ case MVT::i16: Opc = X86::MOV16mi; break;
+ case MVT::i32: Opc = X86::MOV32mi; break;
+ case MVT::i64:
+ // Must be a 32-bit sign extended value.
+ if ((int)CI->getSExtValue() == CI->getSExtValue())
+ Opc = X86::MOV64mi32;
+ break;
+ }
+
+ if (Opc) {
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM)
+ .addImm(Signed ? (uint64_t) CI->getSExtValue() :
+ CI->getZExtValue());
+ return true;
+ }
+ }
+
+ unsigned ValReg = getRegForValue(Val);
+ if (ValReg == 0)
+ return false;
+
+ return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+ unsigned Src, EVT SrcVT,
+ unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::Alloca: {
+ // Do static allocas.
+ const AllocaInst *A = cast<AllocaInst>(V);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(A);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = SI->second;
+ return true;
+ }
+ break;
+ }
+
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+ // They have to fit in the 32-bit signed displacement field though.
+ if (isInt<32>(Disp)) {
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM);
+ }
+ }
+ break;
+ }
+
+ case Instruction::GetElementPtr: {
+ X86AddressMode SavedAM = AM;
+
+ // Pattern-match simple GEPs.
+ uint64_t Disp = (int32_t)AM.Disp;
+ unsigned IndexReg = AM.IndexReg;
+ unsigned Scale = AM.Scale;
+ gep_type_iterator GTI = gep_type_begin(U);
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
+ }
+ // Unsupported.
+ goto unsupported_gep;
+ }
+ }
+ // Check for displacement overflow.
+ if (!isInt<32>(Disp))
+ break;
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ AM.IndexReg = IndexReg;
+ AM.Scale = Scale;
+ AM.Disp = (uint32_t)Disp;
+ if (X86SelectAddress(U->getOperand(0), AM))
+ return true;
+
+ // If we couldn't merge the gep value into this addr mode, revert back to
+ // our address and just match the value instead of completely failing.
+ AM = SavedAM;
+ break;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
+ }
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // Can't handle TLS yet.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectCallAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (Subtarget->isPICStyleRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle DLLImport.
+ if (GV->hasDLLImportLinkage())
+ return false;
+
+ // Can't handle TLS.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Okay, we've committed to selecting this global. Set up the basic address.
+ AM.GV = GV;
+
+ // No ABI requires an extra load for anything other than DLLImport, which
+ // we rejected above. Return a direct reference to the global.
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ AM.GVOpFlags = X86II::MO_GOTOFF;
+ }
+
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(const Instruction *I) {
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(1), AM))
+ return false;
+
+ return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ // Don't handle popping bytes on return for now.
+ if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+ ->getBytesToPopOnReturn() != 0)
+ return 0;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+ I->getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+
+ unsigned SrcReg = Reg + VA.getValNo();
+ EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT DstVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (SrcVT != DstVT) {
+ if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+ if (SrcVT == MVT::i1) {
+ if (Outs[0].Flags.isSExt())
+ return false;
+ SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+ }
+ unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg, /*TODO: Kill=*/false);
+ }
+
+ // Make the copy.
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ // Now emit the RET.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ return true;
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(0), AM))
+ return false;
+
+ unsigned ResultReg = 0;
+ if (X86FastEmitLoad(VT, AM, ResultReg)) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ return false;
+}
+
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ bool HasAVX = Subtarget->hasAVX();
+ bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8: return X86::CMP8rr;
+ case MVT::i16: return X86::CMP16rr;
+ case MVT::i32: return X86::CMP32rr;
+ case MVT::i64: return X86::CMP64rr;
+ case MVT::f32:
+ return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
+ case MVT::f64:
+ return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
+ }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ // Otherwise, we can't fold the immediate into this comparison.
+ default: return 0;
+ case MVT::i8: return X86::CMP8ri;
+ case MVT::i16: return X86::CMP16ri;
+ case MVT::i32: return X86::CMP32ri;
+ case MVT::i64:
+ // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+ // field.
+ if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ return X86::CMP64ri32;
+ return 0;
+ }
+}
+
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+ EVT VT) {
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (Op0Reg == 0) return false;
+
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Op1))
+ Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
+
+ // We have two options: compare with register or immediate. If the RHS of
+ // the compare is an immediate that we can fold into this compare, use
+ // CMPri, otherwise use CMPrr.
+ if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+ .addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
+ if (CompareOpc == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(Op1);
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
+
+ return true;
+}
+
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+ unsigned SetCCOpc;
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ switch (CI->getPredicate()) {
+ case CmpInst::FCMP_OEQ: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned EReg = createResultReg(&X86::GR8RegClass);
+ unsigned NPReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNPr), NPReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_UNE: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned NEReg = createResultReg(&X86::GR8RegClass);
+ unsigned PReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
+ .addReg(PReg).addReg(NEReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+ default:
+ return false;
+ }
+
+ const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of Op0/Op1.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
+ // Handle zero-extension from i1 to i8, which is common.
+ if (!I->getOperand(0)->getType()->isIntegerTy(1))
+ return false;
+
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ if (ResultReg == 0)
+ return false;
+
+ if (DstVT != MVT::i8) {
+ ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+ }
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
+ // Unconditional branches are selected by tablegen-generated code.
+ // Handle a conditional branch.
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+ EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+ switch (Predicate) {
+ case CmpInst::FCMP_OEQ:
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::FCMP_UNE;
+ // FALL THROUGH
+ case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
+ default:
+ return false;
+ }
+
+ const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+
+ if (Predicate == CmpInst::FCMP_UNE) {
+ // X86 requires a second branch to handle UNE (and OEQ,
+ // which is mapped to UNE above).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+ .addMBB(TrueMBB);
+ }
+
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
+
+ unsigned JmpOpc = X86::JNE_4;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_4;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
+ }
+
+ // Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
+ unsigned OpReg = getRegForValue(BI->getCondition());
+ if (OpReg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+}
+
+bool X86FastISel::X86SelectShift(const Instruction *I) {
+ unsigned CReg = 0, OpReg = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (I->getType()->isIntegerTy(8)) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(16)) {
+ CReg = X86::CX;
+ RC = &X86::GR16RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(32)) {
+ CReg = X86::ECX;
+ RC = &X86::GR32RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(64)) {
+ CReg = X86::RCX;
+ RC = &X86::GR64RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
+ default: return false;
+ }
+ } else {
+ return false;
+ }
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
+
+ // The shift instruction uses X86::CL. If we defined a super-register
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
+ if (CReg != X86::CL)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+ .addReg(Op0Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // We only use cmov here, if we don't have a cmov instruction bail.
+ if (!Subtarget->hasCMov()) return false;
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (VT == MVT::i16) {
+ Opc = X86::CMOVE16rr;
+ RC = &X86::GR16RegClass;
+ } else if (VT == MVT::i32) {
+ Opc = X86::CMOVE32rr;
+ RC = &X86::GR32RegClass;
+ } else if (VT == MVT::i64) {
+ Opc = X86::CMOVE64rr;
+ RC = &X86::GR64RegClass;
+ } else {
+ return false;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(Op0Reg).addReg(Op0Reg);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
+ // fpext from float to double.
+ if (X86ScalarSSEf64 &&
+ I->getType()->isDoubleTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isFloatTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSS2SDrr), ResultReg)
+ .addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
+ if (X86ScalarSSEf64) {
+ if (I->getType()->isFloatTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isDoubleTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSD2SSrr), ResultReg)
+ .addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ // This code only handles truncation to byte.
+ if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ return false;
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ UpdateValueMap(I, InputReg);
+ return true;
+ }
+
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+ ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
+
+ // Issue an extract_subreg.
+ unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ InputReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
+
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+ RV &= X86FastEmitStore(VT, Reg, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
+ }
+
+ return true;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memcpy: {
+ const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI.isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI.getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memcpy");
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+
+ if (MSI.isVolatile())
+ return false;
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memset");
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code inline code to store the stack guard onto the stack.
+ EVT PtrTy = TLI.getPointerTy();
+
+ const Value *Op1 = I.getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ // Grab the frame index.
+ X86AddressMode AM;
+ if (!X86SelectAddress(Slot, AM)) return false;
+ if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+ X86AddressMode AM;
+ assert(DI->getAddress() && "Null address should be checked earlier!");
+ if (!X86SelectAddress(DI->getAddress(), AM))
+ return false;
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ // FIXME may need to add RegState::Debug to any registers produced,
+ // although ESP/EBP should be the only ones at the moment.
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+ addImm(0).addMetadata(DI->getVariable());
+ return true;
+ }
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ // FIXME: Should fold immediates.
+
+ // Replace "add with overflow" intrinsics with an "add" instruction followed
+ // by a seto/setc instruction.
+ const Function *Callee = I.getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ const Value *Op1 = I.getArgOperand(0);
+ const Value *Op2 = I.getArgOperand(1);
+ unsigned Reg1 = getRegForValue(Op1);
+ unsigned Reg2 = getRegForValue(Op2);
+
+ if (Reg1 == 0 || Reg2 == 0)
+ // FIXME: Handle values *not* in registers.
+ return false;
+
+ unsigned OpC = 0;
+ if (VT == MVT::i32)
+ OpC = X86::ADD32rr;
+ else if (VT == MVT::i64)
+ OpC = X86::ADD64rr;
+ else
+ return false;
+
+ // The call to CreateRegs builds two sequential registers, to store the
+ // both the the returned values.
+ unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+ .addReg(Reg1).addReg(Reg2);
+
+ unsigned Opc = X86::SETBr;
+ if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+ Opc = X86::SETOr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
+
+ UpdateValueMap(&I, ResultReg, 2);
+ return true;
+ }
+ }
+}
+
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm yet.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Handle intrinsic calls.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+ return X86VisitIntrinsicCall(*II);
+
+ return DoSelectCall(I, 0);
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Handle only C and fastcc calling conventions for now.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC != CallingConv::C && CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ return false;
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool isVarArg = FTy->isVarArg();
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (isVarArg && Subtarget->isTargetWin64())
+ return false;
+
+ // Fast-isel doesn't know about callee-pop yet.
+ if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+ GuaranteedTailCallOpt))
+ return false;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ SmallVector<uint64_t, 4> Offsets;
+ GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
+ if (!CanLowerReturn)
+ return false;
+
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectCallAddress(Callee, CalleeAM))
+ return false;
+ unsigned CalleeOp = 0;
+ const GlobalValue *GV = 0;
+ if (CalleeAM.GV != 0) {
+ GV = CalleeAM.GV;
+ } else if (CalleeAM.Base.Reg != 0) {
+ CalleeOp = CalleeAM.Base.Reg;
+ } else
+ return false;
+
+ // Deal with call operands first.
+ SmallVector<const Value *, 8> ArgVals;
+ SmallVector<unsigned, 8> Args;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgVals.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ // If we're lowering a mem intrinsic instead of a regular call, skip the
+ // last two arguments, which should not passed to the underlying functions.
+ if (MemIntName && e-i <= 2)
+ break;
+ Value *ArgVal = *i;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+ unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByVal();
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ if (!IsMemcpySmall(FrameSize))
+ return false;
+ }
+
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ Flags.setInReg();
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ Flags.setNest();
+
+ // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all fastisel supported
+ // calling conventions on x86.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+ if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+ CI->getBitWidth() == 16) {
+ if (Flags.isSExt())
+ ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ else
+ ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ unsigned ArgReg;
+
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+ cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+ ArgVal->hasOneUse()) {
+ ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+ ArgReg = getRegForValue(ArgVal);
+ if (ArgReg == 0) return false;
+
+ MVT ArgVT;
+ if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+ ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+ ArgVal->hasOneUse(), 1);
+ } else {
+ ArgReg = getRegForValue(ArgVal);
+ }
+
+ if (ArgReg == 0) return false;
+
+ Type *ArgTy = ArgVal->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ if (ArgVT == MVT::x86mmx)
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(ArgReg);
+ ArgVals.push_back(ArgVal);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+ I->getParent()->getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64())
+ CCInfo.AllocateStack(32, 8);
+
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+ .addImm(NumBytes);
+
+ // Process argument: walk the register/memloc assignments, inserting
+ // copies / loads.
+ SmallVector<unsigned, 4> RegArgs;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = Args[VA.getValNo()];
+ EVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+ ISD::BITCAST, Arg, /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg()).addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ X86AddressMode AM;
+ AM.Base.Reg = StackPtr;
+ AM.Disp = LocMemOffset;
+ const Value *ArgVal = ArgVals[VA.getValNo()];
+ ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = Arg;
+ bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+ assert(Res && "memcpy length already checked!"); (void)Res;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ // of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
+ X86FastEmitStore(ArgVT, ArgVal, AM);
+ } else {
+ X86FastEmitStore(ArgVT, Arg, AM);
+ }
+ }
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (Subtarget->isPICStyleGOT()) {
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::EBX).addReg(Base);
+ }
+
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
+ // Issue the call.
+ MachineInstrBuilder MIB;
+ if (CalleeOp) {
+ // Register-indirect call.
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64r;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addReg(CalleeOp);
+
+ } else {
+ // Direct call.
+ assert(GV && "Not a direct call");
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64pcrel32;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
+
+ // See if we need any target-specific flags on the GV operand.
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+ if (MemIntName)
+ MIB.addExternalSymbol(MemIntName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
+ }
+
+ // Add an implicit use GOT pointer in EBX.
+ if (Subtarget->isPICStyleGOT())
+ MIB.addReg(X86::EBX);
+
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ MIB.addReg(X86::AL);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ unsigned NumBytesCallee = 0;
+ if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+ NumBytesCallee = 4;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(NumBytesCallee);
+
+ // Build info for return calling conv lowering code.
+ // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, I->getType(), RetTys);
+ for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
+ EVT VT = RetTys[i];
+ EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
+ for (unsigned j = 0; j != NumRegs; ++j) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.Used = !CS.getInstruction()->use_empty();
+ if (CS.paramHasAttr(0, Attribute::SExt))
+ MyFlags.Flags.setSExt();
+ if (CS.paramHasAttr(0, Attribute::ZExt))
+ MyFlags.Flags.setZExt();
+ if (CS.paramHasAttr(0, Attribute::InReg))
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
+
+ // Now handle call return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+ I->getParent()->getContext());
+ unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ unsigned CopyReg = ResultReg + i;
+
+ // If this is a call to a function that returns an fp value on the x87 fp
+ // stack, but where we prefer to use the value in xmm registers, copy it
+ // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((RVLocs[i].getLocReg() == X86::ST0 ||
+ RVLocs[i].getLocReg() == X86::ST1)) {
+ if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(X86::RFP80RegisterClass);
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+ CopyReg);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(RVLocs[i].getLocReg());
+ UsedRegs.push_back(RVLocs[i].getLocReg());
+ }
+
+ if (CopyVT != RVLocs[i].getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register. This is accomplished by storing the F80 value in memory and
+ // then loading it back. Ewww...
+ EVT ResVT = RVLocs[i].getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc)), FI)
+ .addReg(CopyReg);
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg + i), FI);
+ }
+ }
+
+ if (RVLocs.size())
+ UpdateValueMap(I, ResultReg, RVLocs.size());
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ return X86SelectLoad(I);
+ case Instruction::Store:
+ return X86SelectStore(I);
+ case Instruction::Ret:
+ return X86SelectRet(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return X86SelectCmp(I);
+ case Instruction::ZExt:
+ return X86SelectZExt(I);
+ case Instruction::Br:
+ return X86SelectBranch(I);
+ case Instruction::Call:
+ return X86SelectCall(I);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ return X86SelectShift(I);
+ case Instruction::Select:
+ return X86SelectSelect(I);
+ case Instruction::Trunc:
+ return X86SelectTrunc(I);
+ case Instruction::FPExt:
+ return X86SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return X86SelectFPTrunc(I);
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return X86SelectZExt(I);
+ if (DstVT.bitsLT(SrcVT))
+ return X86SelectTrunc(I);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
+ MVT VT;
+ if (!isTypeLegal(C->getType(), VT))
+ return false;
+
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ // Materialize addresses with LEA instructions.
+ if (isa<GlobalValue>(C)) {
+ X86AddressMode AM;
+ if (X86SelectAddress(C, AM)) {
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ return AM.Base.Reg;
+
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+ }
+ return 0;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ unsigned char OpFlag = 0;
+ if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOTOFF;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ TM.getCodeModel() == CodeModel::Small) {
+ PICBase = X86::RIP;
+ }
+
+ // Create the load from the constant pool.
+ unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned ResultReg = createResultReg(RC);
+ addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg),
+ MCPOffset, PICBase, OpFlag);
+
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+ // Fail on dynamic allocas. At this point, getRegForValue has already
+ // checked its CSE maps, so if we're here trying to handle a dynamic
+ // alloca, we're not going to succeed. X86SelectAddress has a
+ // check for dynamic allocas, because it's called directly from
+ // various places, but TargetMaterializeAlloca also needs a check
+ // in order to avoid recursion between getRegForValue,
+ // X86SelectAddrss, and TargetMaterializeAlloca.
+ if (!FuncInfo.StaticAllocaMap.count(C))
+ return 0;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(C, AM))
+ return 0;
+ unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return false;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ X86AddressMode AM;
+ if (!X86SelectAddress(LI->getOperand(0), AM))
+ return false;
+
+ X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+ unsigned Size = TD.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ if (Result == 0) return false;
+
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
+namespace llvm {
+ llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+ return new X86FastISel(funcInfo);
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 0000000..e3461c8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1743 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// pseudo registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
+//
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP , "Number of floating point instructions");
+
+namespace {
+ struct FPS : public MachineFunctionPass {
+ static char ID;
+ FPS() : MachineFunctionPass(ID) {
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ // This is really only to keep valgrind quiet.
+ // The logic in isLive() is too much for it.
+ memset(Stack, 0, sizeof(Stack));
+ memset(RegMap, 0, sizeof(RegMap));
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<EdgeBundles>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Two CFG edges are related if they leave the same block, or enter the same
+ // block. The transitive closure of an edge under this relation is a
+ // LiveBundle. It represents a set of CFG edges where the live FP stack
+ // registers must be allocated identically in the x87 stack.
+ //
+ // A LiveBundle is usually all the edges leaving a block, or all the edges
+ // entering a block, but it can contain more edges if critical edges are
+ // present.
+ //
+ // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+ // but the exact mapping of FP registers to stack slots is fixed later.
+ struct LiveBundle {
+ // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+ unsigned Mask;
+
+ // Number of pre-assigned live registers in FixStack. This is 0 when the
+ // stack order has not yet been fixed.
+ unsigned FixCount;
+
+ // Assigned stack order for live-in registers.
+ // FixStack[i] == getStackEntry(i) for all i < FixCount.
+ unsigned char FixStack[8];
+
+ LiveBundle() : Mask(0), FixCount(0) {}
+
+ // Have the live registers been assigned a stack order yet?
+ bool isFixed() const { return !Mask || FixCount; }
+ };
+
+ // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+ // with no live FP registers.
+ SmallVector<LiveBundle, 8> LiveBundles;
+
+ // The edge bundle analysis provides indices into the LiveBundles vector.
+ EdgeBundles *Bundles;
+
+ // Return a bitmask of FP registers in block's live-in list.
+ unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ unsigned Mask = 0;
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I - X86::FP0;
+ if (Reg < 8)
+ Mask |= 1 << Reg;
+ }
+ return Mask;
+ }
+
+ // Partition all the CFG edges into LiveBundles.
+ void bundleCFG(MachineFunction &MF);
+
+ MachineBasicBlock *MBB; // Current basic block
+
+ // The hardware keeps track of how many FP registers are live, so we have
+ // to model that exactly. Usually, each live register corresponds to an
+ // FP<n> register, but when dealing with calls, returns, and inline
+ // assembly, it is sometimes neccesary to have live scratch registers.
+ unsigned Stack[8]; // FP<n> Registers in each stack slot...
+ unsigned StackTop; // The current top of the FP stack.
+
+ enum {
+ NumFPRegs = 16 // Including scratch pseudo-registers.
+ };
+
+ // For each live FP<n> register, point to its Stack[] entry.
+ // The first entries correspond to FP0-FP6, the rest are scratch registers
+ // used when we need slightly different live registers than what the
+ // register allocator thinks.
+ unsigned RegMap[NumFPRegs];
+
+ // Pending fixed registers - Inline assembly needs FP registers to appear
+ // in fixed stack slot positions. This is handled by copying FP registers
+ // to ST registers before the instruction, and copying back after the
+ // instruction.
+ //
+ // This is modeled with pending ST registers. NumPendingSTs is the number
+ // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
+ // register that holds the ST value. The ST registers are not moved into
+ // place until immediately before the instruction that needs them.
+ //
+ // It can happen that we need an ST register to be live when no FP register
+ // holds the value:
+ //
+ // %ST0 = COPY %FP4<kill>
+ //
+ // When that happens, we allocate a scratch FP register to hold the ST
+ // value. That means every register in PendingST must be live.
+
+ unsigned NumPendingSTs;
+ unsigned char PendingST[8];
+
+ // Set up our stack model to match the incoming registers to MBB.
+ void setupBlockStack();
+
+ // Shuffle live registers to match the expectations of successor blocks.
+ void finishBlockStack();
+
+ void dumpStack() const {
+ dbgs() << "Stack contents:";
+ for (unsigned i = 0; i != StackTop; ++i) {
+ dbgs() << " FP" << Stack[i];
+ assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+ }
+ for (unsigned i = 0; i != NumPendingSTs; ++i)
+ dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
+ dbgs() << "\n";
+ }
+
+ /// getSlot - Return the stack slot number a particular register number is
+ /// in.
+ unsigned getSlot(unsigned RegNo) const {
+ assert(RegNo < NumFPRegs && "Regno out of range!");
+ return RegMap[RegNo];
+ }
+
+ /// isLive - Is RegNo currently live in the stack?
+ bool isLive(unsigned RegNo) const {
+ unsigned Slot = getSlot(RegNo);
+ return Slot < StackTop && Stack[Slot] == RegNo;
+ }
+
+ /// getScratchReg - Return an FP register that is not currently in use.
+ unsigned getScratchReg() {
+ for (int i = NumFPRegs - 1; i >= 8; --i)
+ if (!isLive(i))
+ return i;
+ llvm_unreachable("Ran out of scratch FP registers");
+ }
+
+ /// isScratchReg - Returns trus if RegNo is a scratch FP register.
+ bool isScratchReg(unsigned RegNo) {
+ return RegNo > 8 && RegNo < NumFPRegs;
+ }
+
+ /// getStackEntry - Return the X86::FP<n> register in register ST(i).
+ unsigned getStackEntry(unsigned STi) const {
+ if (STi >= StackTop)
+ report_fatal_error("Access past stack top!");
+ return Stack[StackTop-1-STi];
+ }
+
+ /// getSTReg - Return the X86::ST(i) register which contains the specified
+ /// FP<RegNo> register.
+ unsigned getSTReg(unsigned RegNo) const {
+ return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ }
+
+ // pushReg - Push the specified FP<n> register onto the stack.
+ void pushReg(unsigned Reg) {
+ assert(Reg < NumFPRegs && "Register number out of range!");
+ if (StackTop >= 8)
+ report_fatal_error("Stack overflow!");
+ Stack[StackTop] = Reg;
+ RegMap[Reg] = StackTop++;
+ }
+
+ bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+ void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+ if (isAtTop(RegNo)) return;
+
+ unsigned STReg = getSTReg(RegNo);
+ unsigned RegOnTop = getStackEntry(0);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+ // Emit an fxch to update the runtime processors version of the state.
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+ ++NumFXCH;
+ }
+
+ void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+ unsigned STReg = getSTReg(RegNo);
+ pushReg(AsReg); // New register on top of stack
+
+ BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+ }
+
+ /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
+ /// RegNo. If any PendingST registers still need the RegNo value, duplicate
+ /// them to new scratch registers.
+ void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
+ for (unsigned i = 0; i != NumPendingSTs; ++i) {
+ if (PendingST[i] != RegNo)
+ continue;
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Duplicating pending ST" << i
+ << " in FP" << RegNo << " to FP" << SR << '\n');
+ duplicateToTop(RegNo, SR, I);
+ PendingST[i] = SR;
+ }
+ }
+
+ /// popStackAfter - Pop the current value off of the top of the FP stack
+ /// after the specified instruction.
+ void popStackAfter(MachineBasicBlock::iterator &I);
+
+ /// freeStackSlotAfter - Free the specified register from the register
+ /// stack, so that it is no longer in a register. If the register is
+ /// currently at the top of the stack, we just pop the current instruction,
+ /// otherwise we store the current top-of-stack into the specified slot,
+ /// then pop the top of stack.
+ void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+ /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+ /// instruction.
+ MachineBasicBlock::iterator
+ freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+ /// Adjust the live registers to be the set in Mask.
+ void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+ /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
+ /// st(0), FP reg FixStack[1] is st(1) etc.
+ void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+ MachineBasicBlock::iterator I);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void handleZeroArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+ void handleTwoArgFP(MachineBasicBlock::iterator &I);
+ void handleCompareFP(MachineBasicBlock::iterator &I);
+ void handleCondMovFP(MachineBasicBlock::iterator &I);
+ void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+ // Check if a COPY instruction is using FP registers.
+ bool isFPCopy(MachineInstr *MI) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ return X86::RFP80RegClass.contains(DstReg) ||
+ X86::RFP80RegClass.contains(SrcReg);
+ }
+ };
+ char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+ assert(MO.isReg() && "Expected an FP register!");
+ unsigned Reg = MO.getReg();
+ assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+ return Reg - X86::FP0;
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+ // We only need to run this pass if there are any FP registers used in this
+ // function. If it is all integer, there is nothing for us to do!
+ bool FPIsUsed = false;
+
+ assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ for (unsigned i = 0; i <= 6; ++i)
+ if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ FPIsUsed = true;
+ break;
+ }
+
+ // Early exit.
+ if (!FPIsUsed) return false;
+
+ Bundles = &getAnalysis<EdgeBundles>();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Prepare cross-MBB liveness.
+ bundleCFG(MF);
+
+ StackTop = 0;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ SmallPtrSet<MachineBasicBlock*, 8> Processed;
+ MachineBasicBlock *Entry = MF.begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, **I);
+
+ // Process any unreachable blocks in arbitrary order now.
+ if (MF.size() != Processed.size())
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
+ LiveBundles.clear();
+
+ return Changed;
+}
+
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+ assert(LiveBundles.empty() && "Stale data in LiveBundles");
+ LiveBundles.resize(Bundles->getNumBundles());
+
+ // Gather the actual live-in masks for all MBBs.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ const unsigned Mask = calcLiveInMask(MBB);
+ if (!Mask)
+ continue;
+ // Update MBB ingoing bundle mask.
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+ }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+ NumPendingSTs = 0;
+
+ setupBlockStack();
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ uint64_t Flags = MI->getDesc().TSFlags;
+
+ unsigned FPInstClass = Flags & X86II::FPTypeMask;
+ if (MI->isInlineAsm())
+ FPInstClass = X86II::SpecialFP;
+
+ if (MI->isCopy() && isFPCopy(MI))
+ FPInstClass = X86II::SpecialFP;
+
+ if (MI->isImplicitDef() &&
+ X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
+ FPInstClass = X86II::SpecialFP;
+
+ if (FPInstClass == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
+
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
+
+ ++NumFP; // Keep track of # of pseudo instrs
+ DEBUG(dbgs() << "\nFPInst:\t" << *MI);
+
+ // Get dead variables list now because the MI pointer may be deleted as part
+ // of processing!
+ SmallVector<unsigned, 8> DeadRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadRegs.push_back(MO.getReg());
+ }
+
+ switch (FPInstClass) {
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::CompareFP: handleCompareFP(I); break;
+ case X86II::CondMovFP: handleCondMovFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
+ default: llvm_unreachable("Unknown FP Type!");
+ }
+
+ // Check to see if any of the values defined by this instruction are dead
+ // after definition. If so, pop them.
+ for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+ unsigned Reg = DeadRegs[i];
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+ freeStackSlotAfter(I, Reg-X86::FP0);
+ }
+ }
+
+ // Print out all of the instructions expanded to if -debug
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ dbgs() << "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ dbgs() << "Inserted instructions:\n\t";
+ Start->print(dbgs(), &MF.getTarget());
+ while (++Start != llvm::next(I)) {}
+ }
+ dumpStack();
+ );
+ (void)PrevMI;
+
+ Changed = true;
+ }
+
+ finishBlockStack();
+
+ return Changed;
+}
+
+/// setupBlockStack - Use the live bundles to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+ DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+ StackTop = 0;
+ // Get the live-in bundle for MBB.
+ const LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "Block has no FP live-ins.\n");
+ return;
+ }
+
+ // Depth-first iteration should ensure that we always have an assigned stack.
+ assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+ // Push the fixed live-in registers.
+ for (unsigned i = Bundle.FixCount; i > 0; --i) {
+ MBB->addLiveIn(X86::ST0+i-1);
+ DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+ << unsigned(Bundle.FixStack[i-1]) << '\n');
+ pushReg(Bundle.FixStack[i-1]);
+ }
+
+ // Kill off unwanted live-ins. This can happen with a critical edge.
+ // FIXME: We could keep these live registers around as zombies. They may need
+ // to be revived at the end of a short block. It might save a few instrs.
+ adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+ // The RET handling below takes care of return blocks for us.
+ if (MBB->succ_empty())
+ return;
+
+ DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+
+ // Get MBB's live-out bundle.
+ unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
+ LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+ // We may need to kill and define some registers to match successors.
+ // FIXME: This can probably be combined with the shuffle below.
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ adjustLiveRegs(Bundle.Mask, Term);
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "No live-outs.\n");
+ return;
+ }
+
+ // Has the stack order been fixed yet?
+ DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+ if (Bundle.isFixed()) {
+ DEBUG(dbgs() << "Shuffling stack to match.\n");
+ shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+ } else {
+ // Not fixed yet, we get to choose.
+ DEBUG(dbgs() << "Fixing stack order now.\n");
+ Bundle.FixCount = StackTop;
+ for (unsigned i = 0; i < StackTop; ++i)
+ Bundle.FixStack[i] = getStackEntry(i);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct TableEntry {
+ unsigned from;
+ unsigned to;
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+ const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ if (!(Table[i] < Table[i+1])) return false;
+ return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I->to;
+ return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+ { X86::ABS_Fp32 , X86::ABS_F },
+ { X86::ABS_Fp64 , X86::ABS_F },
+ { X86::ABS_Fp80 , X86::ABS_F },
+ { X86::ADD_Fp32m , X86::ADD_F32m },
+ { X86::ADD_Fp64m , X86::ADD_F64m },
+ { X86::ADD_Fp64m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m64 , X86::ADD_F64m },
+ { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+ { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+ { X86::CHS_Fp32 , X86::CHS_F },
+ { X86::CHS_Fp64 , X86::CHS_F },
+ { X86::CHS_Fp80 , X86::CHS_F },
+ { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
+ { X86::CMOVB_Fp32 , X86::CMOVB_F },
+ { X86::CMOVB_Fp64 , X86::CMOVB_F },
+ { X86::CMOVB_Fp80 , X86::CMOVB_F },
+ { X86::CMOVE_Fp32 , X86::CMOVE_F },
+ { X86::CMOVE_Fp64 , X86::CMOVE_F },
+ { X86::CMOVE_Fp80 , X86::CMOVE_F },
+ { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+ { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
+ { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
+ { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
+ { X86::CMOVP_Fp32 , X86::CMOVP_F },
+ { X86::CMOVP_Fp64 , X86::CMOVP_F },
+ { X86::CMOVP_Fp80 , X86::CMOVP_F },
+ { X86::COS_Fp32 , X86::COS_F },
+ { X86::COS_Fp64 , X86::COS_F },
+ { X86::COS_Fp80 , X86::COS_F },
+ { X86::DIVR_Fp32m , X86::DIVR_F32m },
+ { X86::DIVR_Fp64m , X86::DIVR_F64m },
+ { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+ { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+ { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+ { X86::DIV_Fp32m , X86::DIV_F32m },
+ { X86::DIV_Fp64m , X86::DIV_F64m },
+ { X86::DIV_Fp64m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m64 , X86::DIV_F64m },
+ { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+ { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+ { X86::ILD_Fp16m32 , X86::ILD_F16m },
+ { X86::ILD_Fp16m64 , X86::ILD_F16m },
+ { X86::ILD_Fp16m80 , X86::ILD_F16m },
+ { X86::ILD_Fp32m32 , X86::ILD_F32m },
+ { X86::ILD_Fp32m64 , X86::ILD_F32m },
+ { X86::ILD_Fp32m80 , X86::ILD_F32m },
+ { X86::ILD_Fp64m32 , X86::ILD_F64m },
+ { X86::ILD_Fp64m64 , X86::ILD_F64m },
+ { X86::ILD_Fp64m80 , X86::ILD_F64m },
+ { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+ { X86::IST_Fp16m32 , X86::IST_F16m },
+ { X86::IST_Fp16m64 , X86::IST_F16m },
+ { X86::IST_Fp16m80 , X86::IST_F16m },
+ { X86::IST_Fp32m32 , X86::IST_F32m },
+ { X86::IST_Fp32m64 , X86::IST_F32m },
+ { X86::IST_Fp32m80 , X86::IST_F32m },
+ { X86::IST_Fp64m32 , X86::IST_FP64m },
+ { X86::IST_Fp64m64 , X86::IST_FP64m },
+ { X86::IST_Fp64m80 , X86::IST_FP64m },
+ { X86::LD_Fp032 , X86::LD_F0 },
+ { X86::LD_Fp064 , X86::LD_F0 },
+ { X86::LD_Fp080 , X86::LD_F0 },
+ { X86::LD_Fp132 , X86::LD_F1 },
+ { X86::LD_Fp164 , X86::LD_F1 },
+ { X86::LD_Fp180 , X86::LD_F1 },
+ { X86::LD_Fp32m , X86::LD_F32m },
+ { X86::LD_Fp32m64 , X86::LD_F32m },
+ { X86::LD_Fp32m80 , X86::LD_F32m },
+ { X86::LD_Fp64m , X86::LD_F64m },
+ { X86::LD_Fp64m80 , X86::LD_F64m },
+ { X86::LD_Fp80m , X86::LD_F80m },
+ { X86::MUL_Fp32m , X86::MUL_F32m },
+ { X86::MUL_Fp64m , X86::MUL_F64m },
+ { X86::MUL_Fp64m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m64 , X86::MUL_F64m },
+ { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+ { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+ { X86::SIN_Fp32 , X86::SIN_F },
+ { X86::SIN_Fp64 , X86::SIN_F },
+ { X86::SIN_Fp80 , X86::SIN_F },
+ { X86::SQRT_Fp32 , X86::SQRT_F },
+ { X86::SQRT_Fp64 , X86::SQRT_F },
+ { X86::SQRT_Fp80 , X86::SQRT_F },
+ { X86::ST_Fp32m , X86::ST_F32m },
+ { X86::ST_Fp64m , X86::ST_F64m },
+ { X86::ST_Fp64m32 , X86::ST_F32m },
+ { X86::ST_Fp80m32 , X86::ST_F32m },
+ { X86::ST_Fp80m64 , X86::ST_F64m },
+ { X86::ST_FpP80m , X86::ST_FP80m },
+ { X86::SUBR_Fp32m , X86::SUBR_F32m },
+ { X86::SUBR_Fp64m , X86::SUBR_F64m },
+ { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+ { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+ { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+ { X86::SUB_Fp32m , X86::SUB_F32m },
+ { X86::SUB_Fp64m , X86::SUB_F64m },
+ { X86::SUB_Fp64m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m64 , X86::SUB_F64m },
+ { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+ { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+ { X86::TST_Fp32 , X86::TST_F },
+ { X86::TST_Fp64 , X86::TST_F },
+ { X86::TST_Fp80 , X86::TST_F },
+ { X86::UCOM_FpIr32 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr64 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr80 , X86::UCOM_FIr },
+ { X86::UCOM_Fpr32 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr64 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr80 , X86::UCOM_Fr },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+ ASSERT_SORTED(OpcodeTable);
+ int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+ return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version. The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+ { X86::ADD_FrST0 , X86::ADD_FPrST0 },
+
+ { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+ { X86::DIV_FrST0 , X86::DIV_FPrST0 },
+
+ { X86::IST_F16m , X86::IST_FP16m },
+ { X86::IST_F32m , X86::IST_FP32m },
+
+ { X86::MUL_FrST0 , X86::MUL_FPrST0 },
+
+ { X86::ST_F32m , X86::ST_FP32m },
+ { X86::ST_F64m , X86::ST_FP64m },
+ { X86::ST_Frr , X86::ST_FPrr },
+
+ { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+ { X86::SUB_FrST0 , X86::SUB_FPrST0 },
+
+ { X86::UCOM_FIr , X86::UCOM_FIPr },
+
+ { X86::UCOM_FPr , X86::UCOM_FPPr },
+ { X86::UCOM_Fr , X86::UCOM_FPr },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction. This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible. The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+ MachineInstr* MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ ASSERT_SORTED(PopTable);
+ if (StackTop == 0)
+ report_fatal_error("Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ // Check to see if there is a popping version of this instruction...
+ int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ if (Opcode != -1) {
+ I->setDesc(TII->get(Opcode));
+ if (Opcode == X86::UCOM_FPPr)
+ I->RemoveOperand(0);
+ } else { // Insert an explicit pop
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+ }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register. If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+ if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
+ popStackAfter(I);
+ return;
+ }
+
+ // Otherwise, store the top of stack into the dead slot, killing the operand
+ // without having to add in an explicit xchg then pop.
+ //
+ I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
+ unsigned STReg = getSTReg(FPRegNo);
+ unsigned OldSlot = getSlot(FPRegNo);
+ unsigned TopReg = Stack[StackTop-1];
+ Stack[OldSlot] = TopReg;
+ RegMap[TopReg] = OldSlot;
+ RegMap[FPRegNo] = ~0;
+ Stack[--StackTop] = ~0;
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+ unsigned Defs = Mask;
+ unsigned Kills = 0;
+ for (unsigned i = 0; i < StackTop; ++i) {
+ unsigned RegNo = Stack[i];
+ if (!(Defs & (1 << RegNo)))
+ // This register is live, but we don't want it.
+ Kills |= (1 << RegNo);
+ else
+ // We don't need to imp-def this live register.
+ Defs &= ~(1 << RegNo);
+ }
+ assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+ // Produce implicit-defs for free by using killed registers.
+ while (Kills && Defs) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+ std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+ std::swap(RegMap[KReg], RegMap[DReg]);
+ Kills &= ~(1 << KReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Kill registers by popping.
+ if (Kills && I != MBB->begin()) {
+ MachineBasicBlock::iterator I2 = llvm::prior(I);
+ while (StackTop) {
+ unsigned KReg = getStackEntry(0);
+ if (!(Kills & (1 << KReg)))
+ break;
+ DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+ popStackAfter(I2);
+ Kills &= ~(1 << KReg);
+ }
+ }
+
+ // Manually kill the rest.
+ while (Kills) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+ freeStackSlotBefore(I, KReg);
+ Kills &= ~(1 << KReg);
+ }
+
+ // Load zeros for all the imp-defs.
+ while(Defs) {
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+ BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+ pushReg(DReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Now we should have the correct registers live.
+ DEBUG(dumpStack());
+ assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+ unsigned FixCount,
+ MachineBasicBlock::iterator I) {
+ // Move items into place, starting from the desired stack bottom.
+ while (FixCount--) {
+ // Old register at position FixCount.
+ unsigned OldReg = getStackEntry(FixCount);
+ // Desired register at position FixCount.
+ unsigned Reg = FixStack[FixCount];
+ if (Reg == OldReg)
+ continue;
+ // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+ moveToTop(Reg, I);
+ if (FixCount > 0)
+ moveToTop(OldReg, I);
+ }
+ DEBUG(dumpStack());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // Result gets pushed on the stack.
+ pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
+ "Can only handle fst* & ftst instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc)
+ duplicatePendingSTBeforeKill(Reg, I);
+
+ // FISTP64m is strange because there isn't a non-popping versions.
+ // If we have one _and_ we don't want to pop the operand, duplicate the value
+ // on the stack instead of moving it. This ensure that popping the value is
+ // always ok.
+ // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+ //
+ if (!KillsSrc &&
+ (MI->getOpcode() == X86::IST_Fp64m32 ||
+ MI->getOpcode() == X86::ISTT_Fp16m32 ||
+ MI->getOpcode() == X86::ISTT_Fp32m32 ||
+ MI->getOpcode() == X86::ISTT_Fp64m32 ||
+ MI->getOpcode() == X86::IST_Fp64m64 ||
+ MI->getOpcode() == X86::ISTT_Fp16m64 ||
+ MI->getOpcode() == X86::ISTT_Fp32m64 ||
+ MI->getOpcode() == X86::ISTT_Fp64m64 ||
+ MI->getOpcode() == X86::IST_Fp64m80 ||
+ MI->getOpcode() == X86::ISTT_Fp16m80 ||
+ MI->getOpcode() == X86::ISTT_Fp32m80 ||
+ MI->getOpcode() == X86::ISTT_Fp64m80 ||
+ MI->getOpcode() == X86::ST_FpP80m)) {
+ duplicateToTop(Reg, getScratchReg(), I);
+ } else {
+ moveToTop(Reg, I); // Move to the top of the stack...
+ }
+
+ // Convert from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ if (MI->getOpcode() == X86::IST_FP64m ||
+ MI->getOpcode() == X86::ISTT_FP16m ||
+ MI->getOpcode() == X86::ISTT_FP32m ||
+ MI->getOpcode() == X86::ISTT_FP64m ||
+ MI->getOpcode() == X86::ST_FP80m) {
+ if (StackTop == 0)
+ report_fatal_error("Stack empty??");
+ --StackTop;
+ } else if (KillsSrc) { // Last use of operand?
+ popStackAfter(I);
+ }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value. These instructions may have
+/// non-fp operands after their FP operands.
+///
+/// Examples:
+/// R1 = fchs R2
+/// R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+#ifndef NDEBUG
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc) {
+ duplicatePendingSTBeforeKill(Reg, I);
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ if (StackTop == 0)
+ report_fatal_error("Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r },
+ { X86::ADD_Fp64 , X86::ADD_FST0r },
+ { X86::ADD_Fp80 , X86::ADD_FST0r },
+ { X86::DIV_Fp32 , X86::DIV_FST0r },
+ { X86::DIV_Fp64 , X86::DIV_FST0r },
+ { X86::DIV_Fp80 , X86::DIV_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r },
+ { X86::MUL_Fp64 , X86::MUL_FST0r },
+ { X86::MUL_Fp80 , X86::MUL_FST0r },
+ { X86::SUB_Fp32 , X86::SUB_FST0r },
+ { X86::SUB_Fp64 , X86::SUB_FST0r },
+ { X86::SUB_Fp80 , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FST0r },
+ { X86::DIV_Fp64 , X86::DIVR_FST0r },
+ { X86::DIV_Fp80 , X86::DIVR_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FST0r },
+ { X86::SUB_Fp64 , X86::SUBR_FST0r },
+ { X86::SUB_Fp80 , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp64 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp80 , X86::DIVR_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp64 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp80 , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 },
+ { X86::ADD_Fp64 , X86::ADD_FrST0 },
+ { X86::ADD_Fp80 , X86::ADD_FrST0 },
+ { X86::DIV_Fp32 , X86::DIV_FrST0 },
+ { X86::DIV_Fp64 , X86::DIV_FrST0 },
+ { X86::DIV_Fp80 , X86::DIV_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 },
+ { X86::MUL_Fp64 , X86::MUL_FrST0 },
+ { X86::MUL_Fp80 , X86::MUL_FrST0 },
+ { X86::SUB_Fp32 , X86::SUB_FrST0 },
+ { X86::SUB_Fp64 , X86::SUB_FrST0 },
+ { X86::SUB_Fp80 , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub ST(0), ST(i)
+/// ST(i) = fsub ST(0), ST(i)
+/// ST(0) = fsubr ST(0), ST(i)
+/// ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+ unsigned Dest = getFPReg(MI->getOperand(0));
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned TOS = getStackEntry(0);
+
+ // One of our operands must be on the top of the stack. If neither is yet, we
+ // need to move one.
+ if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
+ // We can choose to move either operand to the top of the stack. If one of
+ // the operands is killed by this instruction, we want that one so that we
+ // can update right on top of the old version.
+ if (KillsOp0) {
+ moveToTop(Op0, I); // Move dead operand to TOS.
+ TOS = Op0;
+ } else if (KillsOp1) {
+ moveToTop(Op1, I);
+ TOS = Op1;
+ } else {
+ // All of the operands are live after this instruction executes, so we
+ // cannot update on top of any operand. Because of this, we must
+ // duplicate one of the stack elements to the top. It doesn't matter
+ // which one we pick.
+ //
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+ } else if (!KillsOp0 && !KillsOp1) {
+ // If we DO have one of our operands at the top of the stack, but we don't
+ // have a dead operand, we must duplicate one of the operands to a new slot
+ // on the stack.
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+
+ // Now we know that one of our operands is on the top of the stack, and at
+ // least one of our operands is killed by this instruction.
+ assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+ "Stack conditions not set up right!");
+
+ // We decide which form to use based on what is on the top of the stack, and
+ // which operand is killed by this instruction.
+ const TableEntry *InstTable;
+ bool isForward = TOS == Op0;
+ bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+ if (updateST0) {
+ if (isForward)
+ InstTable = ForwardST0Table;
+ else
+ InstTable = ReverseST0Table;
+ } else {
+ if (isForward)
+ InstTable = ForwardSTiTable;
+ else
+ InstTable = ReverseSTiTable;
+ }
+
+ int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+ MI->getOpcode());
+ assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+ // NotTOS - The register which is not on the top of stack...
+ unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+ // Replace the old instruction with a new instruction
+ MBB->remove(I++);
+ I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+ // If both operands are killed, pop one off of the stack in addition to
+ // overwriting the other one.
+ if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+ assert(!updateST0 && "Should have updated other operand!");
+ popStackAfter(I); // Pop the top of stack
+ }
+
+ // Update stack information so that we know the destination register is now on
+ // the stack.
+ unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+ assert(UpdatedSlot < StackTop && Dest < 7);
+ Stack[UpdatedSlot] = Dest;
+ RegMap[Dest] = UpdatedSlot;
+ MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // Make sure the first operand is on the top of stack, the other one can be
+ // anywhere.
+ moveToTop(Op0, I);
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->RemoveOperand(1);
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If any of the operands are killed by this instruction, free them.
+ if (KillsOp0) freeStackSlotAfter(I, Op0);
+ if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions. These
+/// instructions move a st(i) register to st(0) iff a condition is true. These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ unsigned Op1 = getFPReg(MI->getOperand(2));
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // The first operand *must* be on the top of the stack.
+ moveToTop(Op0, I);
+
+ // Change the second operand to the stack register that the operand is in.
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0);
+ MI->RemoveOperand(1);
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If we kill the second operand, make sure to pop it from the stack.
+ if (Op0 != Op1 && KillsOp1) {
+ // Get this value off of the register stack.
+ freeStackSlotAfter(I, Op1);
+ }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions. This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SpecialFP instruction!");
+ case TargetOpcode::COPY: {
+ // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
+ const MachineOperand &MO1 = MI->getOperand(1);
+ const MachineOperand &MO0 = MI->getOperand(0);
+ unsigned DstST = MO0.getReg() - X86::ST0;
+ unsigned SrcST = MO1.getReg() - X86::ST0;
+ bool KillsSrc = MI->killsRegister(MO1.getReg());
+
+ // ST = COPY FP. Set up a pending ST register.
+ if (DstST < 8) {
+ unsigned SrcFP = getFPReg(MO1);
+ assert(isLive(SrcFP) && "Cannot copy dead register");
+ assert(!MO0.isDead() && "Cannot copy to dead ST register");
+
+ // Unallocated STs are marked as the nonexistent FP255.
+ while (NumPendingSTs <= DstST)
+ PendingST[NumPendingSTs++] = NumFPRegs;
+
+ // STi could still be live from a previous inline asm.
+ if (isScratchReg(PendingST[DstST])) {
+ DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
+ << '\n');
+ freeStackSlotBefore(MI, PendingST[DstST]);
+ }
+
+ // When the source is killed, allocate a scratch FP register.
+ if (KillsSrc) {
+ duplicatePendingSTBeforeKill(SrcFP, I);
+ unsigned Slot = getSlot(SrcFP);
+ unsigned SR = getScratchReg();
+ PendingST[DstST] = SR;
+ Stack[Slot] = SR;
+ RegMap[SR] = Slot;
+ } else
+ PendingST[DstST] = SrcFP;
+ break;
+ }
+
+ // FP = COPY ST. Extract fixed stack value.
+ // Any instruction defining ST registers must have assigned them to a
+ // scratch register.
+ if (SrcST < 8) {
+ unsigned DstFP = getFPReg(MO0);
+ assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
+ assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
+ unsigned SrcFP = PendingST[SrcST];
+ assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
+ assert(isLive(SrcFP) && "Scratch holding ST is dead");
+
+ // DstFP steals the stack slot from SrcFP.
+ unsigned Slot = getSlot(SrcFP);
+ Stack[Slot] = DstFP;
+ RegMap[DstFP] = Slot;
+
+ // Always treat the ST as killed.
+ PendingST[SrcST] = NumFPRegs;
+ while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+ --NumPendingSTs;
+ break;
+ }
+
+ // FP <- FP copy.
+ unsigned DstFP = getFPReg(MO0);
+ unsigned SrcFP = getFPReg(MO1);
+ assert(isLive(SrcFP) && "Cannot copy dead register");
+ if (KillsSrc) {
+ // If the input operand is killed, we can just change the owner of the
+ // incoming stack slot into the result.
+ unsigned Slot = getSlot(SrcFP);
+ Stack[Slot] = DstFP;
+ RegMap[DstFP] = Slot;
+ } else {
+ // For COPY we just duplicate the specified value to a new stack slot.
+ // This could be made better, but would require substantial changes.
+ duplicateToTop(SrcFP, DstFP, I);
+ }
+ break;
+ }
+
+ case TargetOpcode::IMPLICIT_DEF: {
+ // All FP registers must be explicitly defined, so load a 0 instead.
+ unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
+ DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(Reg);
+ break;
+ }
+
+ case X86::FpPOP_RETVAL: {
+ // The FpPOP_RETVAL instruction is used after calls that return a value on
+ // the floating point stack. We cannot model this with ST defs since CALL
+ // instructions have fixed clobber lists. This instruction is interpreted
+ // to mean that there is one more live register on the stack than we
+ // thought.
+ //
+ // This means that StackTop does not match the hardware stack between a
+ // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
+ // between CALL and FpPOP_RETVAL as long as they don't overflow the
+ // hardware stack.
+ unsigned DstFP = getFPReg(MI->getOperand(0));
+
+ // Move existing stack elements up to reflect reality.
+ assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+ if (StackTop) {
+ std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+ for (unsigned i = 0; i != NumFPRegs; ++i)
+ ++RegMap[i];
+ }
+ ++StackTop;
+
+ // DstFP is the new bottom of the stack.
+ Stack[0] = DstFP;
+ RegMap[DstFP] = 0;
+
+ // DstFP will be killed by processBasicBlock if this was a dead def.
+ break;
+ }
+
+ case TargetOpcode::INLINEASM: {
+ // The inline asm MachineInstr currently only *uses* FP registers for the
+ // 'f' constraint. These should be turned into the current ST(x) register
+ // in the machine instr.
+ //
+ // There are special rules for x87 inline assembly. The compiler must know
+ // exactly how many registers are popped and pushed implicitly by the asm.
+ // Otherwise it is not possible to restore the stack state after the inline
+ // asm.
+ //
+ // There are 3 kinds of input operands:
+ //
+ // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+ // popped input operand must be in a fixed stack slot, and it is either
+ // tied to an output operand, or in the clobber list. The MI has ST use
+ // and def operands for these inputs.
+ //
+ // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+ // preserved by the inline asm. The fixed stack slots must be STn-STm
+ // following the popped inputs. A fixed input operand cannot be tied to
+ // an output or appear in the clobber list. The MI has ST use operands
+ // and no defs for these inputs.
+ //
+ // 3. Preserved inputs. These inputs use the "f" constraint which is
+ // represented as an FP register. The inline asm won't change these
+ // stack slots.
+ //
+ // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+ // registers do not count as output operands. The inline asm changes the
+ // stack as if it popped all the popped inputs and then pushed all the
+ // output operands.
+
+ // Scan the assembly for ST registers used, defined and clobbered. We can
+ // only tell clobbers from defs by looking at the asm descriptor.
+ unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+ unsigned NumOps = 0;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+ i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+ unsigned Flags = MI->getOperand(i).getImm();
+ NumOps = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumOps != 1)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i + 1);
+ if (!MO.isReg())
+ continue;
+ unsigned STReg = MO.getReg() - X86::ST0;
+ if (STReg >= 8)
+ continue;
+
+ switch (InlineAsm::getKind(Flags)) {
+ case InlineAsm::Kind_RegUse:
+ STUses |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ STDefs |= (1u << STReg);
+ if (MO.isDead())
+ STDeadDefs |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_Clobber:
+ STClobbers |= (1u << STReg);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (STUses && !isMask_32(STUses))
+ MI->emitError("fixed input regs must be last on the x87 stack");
+ unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+ // Defs must be contiguous from the stack top. ST0-STn.
+ if (STDefs && !isMask_32(STDefs)) {
+ MI->emitError("output regs must be last on the x87 stack");
+ STDefs = NextPowerOf2(STDefs) - 1;
+ }
+ unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+ // So must the clobbered stack slots. ST0-STm, m >= n.
+ if (STClobbers && !isMask_32(STDefs | STClobbers))
+ MI->emitError("clobbers must be last on the x87 stack");
+
+ // Popped inputs are the ones that are also clobbered or defined.
+ unsigned STPopped = STUses & (STDefs | STClobbers);
+ if (STPopped && !isMask_32(STPopped))
+ MI->emitError("implicitly popped regs must be last on the x87 stack");
+ unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+ DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+ << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+ // Scan the instruction for FP uses corresponding to "f" constraints.
+ // Collect FP registers to kill afer the instruction.
+ // Always kill all the scratch regs.
+ unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+ unsigned FPUsed = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ if (!Op.isUse())
+ MI->emitError("illegal \"f\" output constraint");
+ unsigned FPReg = getFPReg(Op);
+ FPUsed |= 1U << FPReg;
+
+ // If we kill this operand, make sure to pop it from the stack after the
+ // asm. We just remember it for now, and pop them all off at the end in
+ // a batch.
+ if (Op.isKill())
+ FPKills |= 1U << FPReg;
+ }
+
+ // The popped inputs will be killed by the instruction, so duplicate them
+ // if the FP register needs to be live after the instruction, or if it is
+ // used in the instruction itself. We effectively treat the popped inputs
+ // as early clobbers.
+ for (unsigned i = 0; i < NumSTPopped; ++i) {
+ if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+ continue;
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+ PendingST[i] = SR;
+ }
+
+ // Make sure we have a unique live register for every fixed use. Some of
+ // them could be undef uses, and we need to emit LD_F0 instructions.
+ for (unsigned i = 0; i < NumSTUses; ++i) {
+ if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+ // Check for shared assignments.
+ for (unsigned j = 0; j < i; ++j) {
+ if (PendingST[j] != PendingST[i])
+ continue;
+ // STi and STj are inn the same register, create a copy.
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i])
+ << " to avoid collision with ST" << j << '\n');
+ PendingST[i] = SR;
+ }
+ continue;
+ }
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(SR);
+ PendingST[i] = SR;
+ if (NumPendingSTs == i)
+ ++NumPendingSTs;
+ }
+ assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+
+ // Now we can rearrange the live registers to match what was requested.
+ shuffleStackTop(PendingST, NumPendingSTs, I);
+ DEBUG({dbgs() << "Before asm: "; dumpStack();});
+
+ // With the stack layout fixed, rewrite the FP registers.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ unsigned FPReg = getFPReg(Op);
+ Op.setReg(getSTReg(FPReg));
+ }
+
+ // Simulate the inline asm popping its inputs and pushing its outputs.
+ StackTop -= NumSTPopped;
+
+ // Hold the fixed output registers in scratch FP registers. They will be
+ // transferred to real FP registers by copies.
+ NumPendingSTs = 0;
+ for (unsigned i = 0; i < NumSTDefs; ++i) {
+ unsigned SR = getScratchReg();
+ pushReg(SR);
+ FPKills &= ~(1u << SR);
+ }
+ for (unsigned i = 0; i < NumSTDefs; ++i)
+ PendingST[NumPendingSTs++] = getStackEntry(i);
+ DEBUG({dbgs() << "After asm: "; dumpStack();});
+
+ // If any of the ST defs were dead, pop them immediately. Our caller only
+ // handles dead FP defs.
+ MachineBasicBlock::iterator InsertPt = MI;
+ for (unsigned i = 0; STDefs & (1u << i); ++i) {
+ if (!(STDeadDefs & (1u << i)))
+ continue;
+ freeStackSlotAfter(InsertPt, PendingST[i]);
+ PendingST[i] = NumFPRegs;
+ }
+ while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+ --NumPendingSTs;
+
+ // If this asm kills any FP registers (is the last use of them) we must
+ // explicitly emit pop instructions for them. Do this now after the asm has
+ // executed so that the ST(x) numbers are not off (which would happen if we
+ // did this inline with operand rewriting).
+ //
+ // Note: this might be a non-optimal pop sequence. We might be able to do
+ // better by trying to pop in stack order or something.
+ while (FPKills) {
+ unsigned FPReg = CountTrailingZeros_32(FPKills);
+ if (isLive(FPReg))
+ freeStackSlotAfter(InsertPt, FPReg);
+ FPKills &= ~(1U << FPReg);
+ }
+ // Don't delete the inline asm!
+ return;
+ }
+
+ case X86::RET:
+ case X86::RETI:
+ // If RET has an FP register use operand, pass the first one in ST(0) and
+ // the second one in ST(1).
+
+ // Find the register operands.
+ unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+ unsigned LiveMask = 0;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ // FP Register uses must be kills unless there are two uses of the same
+ // register, in which case only one will be a kill.
+ assert(Op.isUse() &&
+ (Op.isKill() || // Marked kill.
+ getFPReg(Op) == FirstFPRegOp || // Second instance.
+ MI->killsRegister(Op.getReg())) && // Later use is marked kill.
+ "Ret only defs operands, and values aren't live beyond it");
+
+ if (FirstFPRegOp == ~0U)
+ FirstFPRegOp = getFPReg(Op);
+ else {
+ assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+ SecondFPRegOp = getFPReg(Op);
+ }
+ LiveMask |= (1 << getFPReg(Op));
+
+ // Remove the operand so that later passes don't see it.
+ MI->RemoveOperand(i);
+ --i, --e;
+ }
+
+ // We may have been carrying spurious live-ins, so make sure only the returned
+ // registers are left live.
+ adjustLiveRegs(LiveMask, MI);
+ if (!LiveMask) return; // Quick check to see if any are possible.
+
+ // There are only four possibilities here:
+ // 1) we are returning a single FP value. In this case, it has to be in
+ // ST(0) already, so just declare success by removing the value from the
+ // FP Stack.
+ if (SecondFPRegOp == ~0U) {
+ // Assert that the top of stack contains the right FP register.
+ assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+ "Top of stack not the right register for RET!");
+
+ // Ok, everything is good, mark the value as not being on the stack
+ // anymore so that our assertion about the stack being empty at end of
+ // block doesn't fire.
+ StackTop = 0;
+ return;
+ }
+
+ // Otherwise, we are returning two values:
+ // 2) If returning the same value for both, we only have one thing in the FP
+ // stack. Consider: RET FP1, FP1
+ if (StackTop == 1) {
+ assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+ "Stack misconfiguration for RET!");
+
+ // Duplicate the TOS so that we return it twice. Just pick some other FPx
+ // register to hold it.
+ unsigned NewReg = getScratchReg();
+ duplicateToTop(FirstFPRegOp, NewReg, MI);
+ FirstFPRegOp = NewReg;
+ }
+
+ /// Okay we know we have two different FPx operands now:
+ assert(StackTop == 2 && "Must have two values live!");
+
+ /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+ /// in ST(1). In this case, emit an fxch.
+ if (getStackEntry(0) == SecondFPRegOp) {
+ assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+ moveToTop(FirstFPRegOp, MI);
+ }
+
+ /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+ /// ST(1). Just remove both from our understanding of the stack and return.
+ assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+ assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+ StackTop = 0;
+ return;
+ }
+
+ I = MBB->erase(I); // Remove the pseudo instruction
+
+ // We want to leave I pointing to the previous instruction, but what if we
+ // just erased the first instruction?
+ if (I == MBB->begin()) {
+ DEBUG(dbgs() << "Inserting dummy KILL\n");
+ I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ } else
+ --I;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 0000000..d54f4ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,1447 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetRegisterInfo &TRI,
+ bool Is64Bit) {
+ const MachineFunction *MF = MBB.getParent();
+ const Function *F = MF->getFunction();
+ if (!F || MF->getMMI().callsEHReturn())
+ return 0;
+
+ static const unsigned CallerSavedRegs32Bit[] = {
+ X86::EAX, X86::EDX, X86::ECX, 0
+ };
+
+ static const unsigned CallerSavedRegs64Bit[] = {
+ X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+ X86::R8, X86::R9, X86::R10, X86::R11, 0
+ };
+
+ unsigned Opc = MBBI->getOpcode();
+ switch (Opc) {
+ default: return 0;
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<unsigned, 8> Uses;
+ for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+ Uses.insert(*AsI);
+ }
+
+ const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ for (; *CS; ++CS)
+ if (!Uses.count(*CS))
+ return *CS;
+ }
+ }
+
+ return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes,
+ bool Is64Bit, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
+ // Use push / pop instead.
+ unsigned Reg = isSub
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ if (Reg) {
+ Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
+ Offset -= ThisVal;
+ continue;
+ }
+ }
+
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ Offset -= ThisVal;
+ }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ // FIXME: THIS ISN'T RUN!!!
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ int Offset = 0;
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+ MCSymbol *Label,
+ unsigned FramePtr) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty()) return;
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = TM.getTargetData();
+ bool HasFP = hasFP(MF);
+
+ // Calculate amount of bytes used for return address storing.
+ int stackGrowth = -TD->getPointerSize();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minimum due to stack growth).
+ int64_t MaxOffset = 0;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(I->getFrameIdx()));
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+ Offset = MaxOffset - Offset + saveAreaOffset;
+
+ // Don't output a new machine move if we're re-saving the frame
+ // pointer. This happens when the PrologEpilogInserter has inserted an extra
+ // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+ // generates one when frame pointers are used. If we generate a "machine
+ // move" for this extra "PUSH", the linker will lose track of the fact that
+ // the frame pointer should have the value of the first "PUSH" when it's
+ // trying to unwind.
+ //
+ // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+ // another bug. I.e., one where we generate a prolog like this:
+ //
+ // pushl %ebp
+ // movl %esp, %ebp
+ // pushl %ebp
+ // pushl %esi
+ // ...
+ //
+ // The immediate re-push of EBP is unnecessary. At the least, it's an
+ // optimization bug. EBP can be used as a scratch register in certain
+ // cases, but probably not when we have a frame pointer.
+ if (HasFP && FramePtr == Reg)
+ continue;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+}
+
+/// getCompactUnwindRegNum - Get the compact unwind number for a given
+/// register. The number corresponds to the enum lists in
+/// compact_unwind_encoding.h.
+static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
+ int Idx = 1;
+ for (; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+}
+
+/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
+/// used with frameless stacks. It is passed the number of registers to be saved
+/// and an array of the registers saved.
+static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
+ unsigned RegCount,
+ bool Is64Bit) {
+ // The saved registers are numbered from 1 to 6. In order to encode the order
+ // in which they were saved, we re-number them according to their place in the
+ // register order. The re-numbering is relative to the last re-numbered
+ // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ static const unsigned CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const unsigned CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ uint32_t RenumRegs[6];
+ for (unsigned i = 6 - RegCount; i < 6; ++i) {
+ int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+
+ unsigned Countless = 0;
+ for (unsigned j = 6 - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+}
+
+/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
+/// compact encoding with a frame pointer.
+static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
+ bool Is64Bit) {
+ static const unsigned CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const unsigned CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ // Encode the registers in the order they were saved, 3-bits per register. The
+ // registers are numbered from 1 to 6.
+ uint32_t RegEnc = 0;
+ for (int I = 5; I >= 0; --I) {
+ unsigned Reg = SavedRegs[I];
+ if (Reg == 0) break;
+ int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+ if (CURegNum == -1)
+ return ~0U;
+ RegEnc |= (CURegNum & 0x7) << (5 - I);
+ }
+
+ assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
+ return RegEnc;
+}
+
+uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ bool Is64Bit = STI.is64Bit();
+ bool HasFP = hasFP(MF);
+
+ unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+ int SavedRegIdx = 6;
+
+ unsigned OffsetSize = (Is64Bit ? 8 : 4);
+
+ unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r);
+ unsigned PushInstrSize = 1;
+ unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
+ unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
+ unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
+
+ unsigned StackDivide = (Is64Bit ? 8 : 4);
+
+ unsigned InstrOffset = 0;
+ unsigned CFAOffset = 0;
+ unsigned StackAdjust = 0;
+
+ MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
+ bool ExpectEnd = false;
+ for (MachineBasicBlock::iterator
+ MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opc = MI.getOpcode();
+ if (Opc == X86::PROLOG_LABEL) continue;
+ if (!MI.getFlag(MachineInstr::FrameSetup)) break;
+
+ // We don't exect any more prolog instructions.
+ if (ExpectEnd) return 0;
+
+ if (Opc == PushInstr) {
+ // If there are too many saved registers, we cannot use compact encoding.
+ if (--SavedRegIdx < 0) return 0;
+
+ SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
+ CFAOffset += OffsetSize;
+ InstrOffset += PushInstrSize;
+ } else if (Opc == MoveInstr) {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ if (DstReg != FramePtr || SrcReg != StackPtr)
+ return 0;
+
+ CFAOffset = 0;
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ InstrOffset += MoveInstrSize;
+ } else if (Opc == SubtractInstr) {
+ if (StackAdjust)
+ // We all ready have a stack pointer adjustment.
+ return 0;
+
+ if (!MI.getOperand(0).isReg() ||
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
+ MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm())
+ // We need this to be a stack adjustment pointer. Something like:
+ //
+ // %RSP<def> = SUB64ri8 %RSP, 48
+ return 0;
+
+ StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+ SubtractInstrIdx += InstrOffset;
+ ExpectEnd = true;
+ }
+ }
+
+ // Encode that we are using EBP/RBP as the frame pointer.
+ uint32_t CompactUnwindEncoding = 0;
+ CFAOffset /= StackDivide;
+ if (HasFP) {
+ if ((CFAOffset & 0xFF) != CFAOffset)
+ // Offset was too big for compact encoding.
+ return 0;
+
+ // Get the encoding of the saved registers when we have a frame pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
+ if (RegEnc == ~0U)
+ return 0;
+
+ CompactUnwindEncoding |= 0x01000000;
+ CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+ CompactUnwindEncoding |= RegEnc & 0x7FFF;
+ } else {
+ unsigned FullOffset = CFAOffset + StackAdjust;
+ if ((FullOffset & 0xFF) == FullOffset) {
+ // Frameless stack.
+ CompactUnwindEncoding |= 0x02000000;
+ CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+ } else {
+ if ((CFAOffset & 0x7) != CFAOffset)
+ // The extra stack adjustments are too big for us to handle.
+ return 0;
+
+ // Frameless stack with an offset too large for us to encode compactly.
+ CompactUnwindEncoding |= 0x03000000;
+
+ // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
+ // instruction.
+ CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
+
+ // Encode any extra stack stack changes (done via push instructions).
+ CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+ }
+
+ // Get the encoding of the saved registers when we don't have a frame
+ // pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
+ 6 - SavedRegIdx,
+ Is64Bit);
+ if (RegEnc == ~0U) return 0;
+ CompactUnwindEncoding |= RegEnc & 0x3FF;
+ }
+
+ return CompactUnwindEncoding;
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *Fn = MF.getFunction();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ Fn->needsUnwindTableEntry();
+ uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+ uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool HasFP = hasFP(MF);
+ bool Is64Bit = STI.is64Bit();
+ bool IsWin64 = STI.isTargetWin64();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+ DebugLoc DL;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone).
+ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ !RegInfo->needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64 && // Win64 has no Red Zone
+ !EnableSegmentedStacks) { // Regular stack
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (HasFP) MinSize += SlotSize;
+ StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(-TailCallReturnAddrDelta)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
+ // Mapping for machine moves:
+ //
+ // DST: VirtualFP AND
+ // SRC: VirtualFP => DW_CFA_def_cfa_offset
+ // ELSE => DW_CFA_def_cfa
+ //
+ // SRC: VirtualFP AND
+ // DST: Register => DW_CFA_def_cfa_register
+ //
+ // ELSE
+ // OFFSET < 0 => DW_CFA_offset_extended_sf
+ // REG < 64 => DW_CFA_offset + Reg
+ // ELSE => DW_CFA_offset_extended
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+ uint64_t NumBytes = 0;
+ int stackGrowth = -TD->getPointerSize();
+
+ if (HasFP) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register, which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP/RBP into the appropriate stack slot.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (needsFrameMoves) {
+ // Mark the place where EBP/RBP was saved.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
+
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Update EBP with the new base value.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
+
+ // Define the current CFA to use the EBP/RBP register.
+ MachineLocation FPDst(FramePtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+
+ // Realign stack
+ if (RegInfo->needsStackRealignment(MF)) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ .addReg(StackPtr)
+ .addImm(-MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ // Skip the callee-saved push instructions.
+ bool PushedRegs = false;
+ int StackOffset = 2 * stackGrowth;
+
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r)) {
+ PushedRegs = true;
+ MBBI->setFlag(MachineInstr::FrameSetup);
+ ++MBBI;
+
+ if (!HasFP && needsFrameMoves) {
+ // Mark callee-saved push instruction.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ // Define the current CFA rule to use the provided offset.
+ unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr;
+ MachineLocation SPDst(Ptr);
+ MachineLocation SPSrc(Ptr, StackOffset);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ StackOffset += stackGrowth;
+ }
+ }
+
+ DL = MBB.findDebugLoc(MBBI);
+
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ // Adjust stack pointer: ESP -= numbytes.
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
+ // stack and adjust the stack pointer in one go. The 64-bit version of
+ // __chkstk is only responsible for probing the stack. The 64-bit prologue is
+ // responsible for adjusting the stack pointer. Touching the stack at 4K
+ // increments is necessary to ensure that the guard pages used by the OS
+ // virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ const char *StackProbeSymbol;
+ bool isSPUpdateNeeded = false;
+
+ if (Is64Bit) {
+ if (STI.isTargetCygMing())
+ StackProbeSymbol = "___chkstk";
+ else {
+ StackProbeSymbol = "__chkstk";
+ isSPUpdateNeeded = true;
+ }
+ } else if (STI.isTargetCygMing())
+ StackProbeSymbol = "_alloca";
+ else
+ StackProbeSymbol = "_chkstk";
+
+ // Check whether EAX is livein for this function.
+ bool isEAXAlive = isEAXLiveIn(MF);
+
+ if (isEAXAlive) {
+ // Sanity check that EAX is not livein for this function.
+ // It should not be, so throw an assert.
+ assert(!Is64Bit && "EAX is livein in x64 case!");
+
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ if (Is64Bit) {
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+ // We'll also use 4 already allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // MSVC x64's __chkstk needs to adjust %rsp.
+ // FIXME: %rax preserves the offset and should be available.
+ if (isSPUpdateNeeded)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if (isEAXAlive) {
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MI->setFlag(MachineInstr::FrameSetup);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+ // Mark end of stack pointer adjustment.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(Label);
+
+ if (!HasFP && NumBytes) {
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize + stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ }
+ }
+
+ // Emit DWARF info specifying the offsets of the callee-saved registers.
+ if (PushedRegs)
+ emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+ }
+
+ // Darwin 10.7 and greater has support for compact unwind encoding.
+ if (STI.getTargetTriple().isMacOSX() &&
+ !STI.getTargetTriple().isMacOSXVersionLT(10, 7))
+ MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF));
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no instructions");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+ bool Is64Bit = STI.is64Bit();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ switch (RetOpcode) {
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ break; // These are ok
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+
+ if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+ !PI->getDesc().isTerminator())
+ break;
+
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned.
+ if (RegInfo->needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back.
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
+ if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI =
+ addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ MBB.insert(MBBI, MI);
+ } else {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes) {
+ // Adjust stack pointer back: ESP += numbytes.
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode == X86::TCRETURNmi ||
+ RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+ RetOpcode == X86::TCRETURNmi64) {
+ bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+ ? X86::TAILJMPm : X86::TAILJMPm64));
+ for (unsigned i = 0; i != 5; ++i)
+ MIB.addOperand(MBBI->getOperand(i));
+ } else if (RetOpcode == X86::TCRETURNri64) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = MBB.getLastNonDebugInstr();
+
+ // Check for possible merge with preceding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ }
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (RI->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+ } else {
+ assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
+ return Offset + StackSize;
+ }
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+
+ // Skip the RETADDR move area
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+
+ unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned CalleeFrameSize = 0;
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ // Push GPRs. It increases frame size.
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+ // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+ // It can be done by spilling XMMs to stack frame.
+ // Note that only Win64 ABI might spill XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+
+ return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Reload XMMs from stack frame.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+
+ // POP GPRs.
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ }
+ return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned SlotSize = RegInfo->getSlotSize();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+ }
+
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta,
+ true);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ (void)FrameIdx;
+ }
+}
+
+static bool
+HasNestArgument(const MachineFunction *MF) {
+ const Function *F = MF->getFunction();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++) {
+ if (I->hasNestAttr())
+ return true;
+ }
+ return false;
+}
+
+static unsigned
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
+ if (Is64Bit) {
+ return X86::R11;
+ } else {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+ bool IsNested = HasNestArgument(&MF);
+
+ if (CallingConvention == CallingConv::X86_FastCall) {
+ if (IsNested) {
+ report_fatal_error("Segmented stacks does not support fastcall with "
+ "nested function.");
+ return -1;
+ } else {
+ return X86::EAX;
+ }
+ } else {
+ if (IsNested)
+ return X86::EDX;
+ else
+ return X86::ECX;
+ }
+ }
+}
+
+void
+X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ uint64_t StackSize;
+ bool Is64Bit = STI.is64Bit();
+ unsigned TlsReg, TlsOffset;
+ DebugLoc DL;
+ const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
+
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "Scratch register is live-in");
+
+ if (MF.getFunction()->isVarArg())
+ report_fatal_error("Segmented stacks do not support vararg functions.");
+ if (!ST->isTargetLinux())
+ report_fatal_error("Segmented stacks supported only on linux.");
+
+ MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool IsNested = false;
+
+ // We need to know if the function has a nest argument only in 64 bit mode.
+ if (Is64Bit)
+ IsNested = HasNestArgument(&MF);
+
+ // The MOV R10, RAX needs to be in a different block, since the RET we emit in
+ // allocMBB needs to be last (terminating) instruction.
+ MachineBasicBlock *restoreR10MBB = NULL;
+ if (IsNested)
+ restoreR10MBB = MF.CreateMachineBasicBlock();
+
+ for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
+ e = prologueMBB.livein_end(); i != e; i++) {
+ allocMBB->addLiveIn(*i);
+ checkMBB->addLiveIn(*i);
+
+ if (IsNested)
+ restoreR10MBB->addLiveIn(*i);
+ }
+
+ if (IsNested) {
+ allocMBB->addLiveIn(X86::R10);
+ restoreR10MBB->addLiveIn(X86::RAX);
+ }
+
+ if (IsNested)
+ MF.push_front(restoreR10MBB);
+ MF.push_front(allocMBB);
+ MF.push_front(checkMBB);
+
+ // Eventually StackSize will be calculated by a link-time pass; which will
+ // also decide whether checking code needs to be injected into this particular
+ // prologue.
+ StackSize = MFI->getStackSize();
+
+ // Read the limit off the current stacklet off the stack_guard location.
+ if (Is64Bit) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else {
+ TlsReg = X86::GS;
+ TlsOffset = 0x30;
+
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ }
+
+ // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
+ // It jumps to normal execution of the function body.
+ BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB);
+
+ // On 32 bit we first push the arguments size and then the frame size. On 64
+ // bit, we pass the stack frame size in r10 and the argument size in r11.
+ if (Is64Bit) {
+ // Functions with nested arguments use R10, so it needs to be saved across
+ // the call to _morestack
+
+ if (IsNested)
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+ .addImm(StackSize);
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+ .addImm(X86FI->getArgumentStackSize());
+ MF.getRegInfo().setPhysRegUsed(X86::R10);
+ MF.getRegInfo().setPhysRegUsed(X86::R11);
+ } else {
+ // Since we'll call __morestack, stack alignment needs to be preserved.
+ BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
+ .addImm(8);
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(X86FI->getArgumentStackSize());
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(StackSize);
+ }
+
+ // __morestack is in libgcc
+ if (Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack");
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack");
+
+ // __morestack only seems to remove 8 bytes off the stack. Add back the
+ // additional 8 bytes we added before pushing the arguments.
+ if (!Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
+ .addImm(8);
+ BuildMI(allocMBB, DL, TII.get(X86::RET));
+
+ if (IsNested)
+ BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10)
+ .addReg(X86::RAX);
+
+ if (IsNested) {
+ allocMBB->addSuccessor(restoreR10MBB);
+ restoreR10MBB->addSuccessor(&prologueMBB);
+ } else {
+ allocMBB->addSuccessor(&prologueMBB);
+ }
+
+ checkMBB->addSuccessor(allocMBB);
+ checkMBB->addSuccessor(&prologueMBB);
+
+#ifdef XDEBUG
+ MF.verify();
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 0000000..6f49064
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,68 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MCSymbol;
+ class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+ const X86TargetMachine &TM;
+ const X86Subtarget &STI;
+public:
+ explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+ : TargetFrameLowering(StackGrowsDown,
+ sti.getStackAlignment(),
+ (sti.is64Bit() ? -8 : -4)),
+ TM(tm), STI(sti) {
+ }
+
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+ unsigned FramePtr) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void adjustForSegmentedStacks(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 0000000..02b0ff2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,2259 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+// Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+ /// SDValue's instead of register numbers for the leaves of the matched
+ /// tree.
+ struct X86ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ // This is really a union, discriminated by BaseType!
+ SDValue Base_Reg;
+ int Base_FrameIndex;
+
+ unsigned Scale;
+ SDValue IndexReg;
+ int32_t Disp;
+ SDValue Segment;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+ unsigned char SymbolFlags; // X86II::MO_*
+
+ X86ISelAddressMode()
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
+ SymbolFlags(X86II::MO_NO_FLAG) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
+ }
+
+ bool hasBaseOrIndexReg() const {
+ return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+ }
+
+ /// isRIPRelative - Return true if this addressing mode is already RIP
+ /// relative.
+ bool isRIPRelative() const {
+ if (BaseType != RegBase) return false;
+ if (RegisterSDNode *RegNode =
+ dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
+ return RegNode->getReg() == X86::RIP;
+ return false;
+ }
+
+ void setBaseReg(SDValue Reg) {
+ BaseType = RegBase;
+ Base_Reg = Reg;
+ }
+
+ void dump() {
+ dbgs() << "X86ISelAddressMode " << this << '\n';
+ dbgs() << "Base_Reg ";
+ if (Base_Reg.getNode() != 0)
+ Base_Reg.getNode()->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
+ << " Scale" << Scale << '\n'
+ << "IndexReg ";
+ if (IndexReg.getNode() != 0)
+ IndexReg.getNode()->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " Disp " << Disp << '\n'
+ << "GV ";
+ if (GV)
+ GV->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " CP ";
+ if (CP)
+ CP->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << '\n'
+ << "ES ";
+ if (ES)
+ dbgs() << ES;
+ else
+ dbgs() << "nul";
+ dbgs() << " JT" << JT << " Align" << Align << '\n';
+ }
+ };
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// ISel - X86 specific code to select X86 machine instructions for
+ /// SelectionDAG operations.
+ ///
+ class X86DAGToDAGISel : public SelectionDAGISel {
+ /// X86Lowering - This object fully describes how to lower LLVM code to an
+ /// X86-specific SelectionDAG.
+ const X86TargetLowering &X86Lowering;
+
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// OptForSize - If true, selector should try to optimize for code size
+ /// instead of performance.
+ bool OptForSize;
+
+ public:
+ explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel),
+ X86Lowering(*tm.getTargetLowering()),
+ Subtarget(&tm.getSubtarget<X86Subtarget>()),
+ OptForSize(false) {}
+
+ virtual const char *getPassName() const {
+ return "X86 DAG->DAG Instruction Selection";
+ }
+
+ virtual void EmitFunctionEntryCode();
+
+ virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+ virtual void PreprocessISelDAG();
+
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+
+ bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+ bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
+ bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectLEAAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment,
+ SDValue &NodeWithChain);
+
+ bool TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+ AM.Base_Reg;
+ Scale = getI8Imm(AM.Scale);
+ Index = AM.IndexReg;
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+ MVT::i32, AM.Disp,
+ AM.SymbolFlags);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+ AM.Align, AM.Disp, AM.SymbolFlags);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
+ else if (AM.BlockAddr)
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+ true, AM.SymbolFlags);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+ if (AM.Segment.getNode())
+ Segment = AM.Segment;
+ else
+ Segment = CurDAG->getRegister(0, MVT::i32);
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getGlobalBaseReg - Return an SDNode that returns the value of
+ /// the global base register. Output instructions required to
+ /// initialize the global base register, if necessary.
+ ///
+ SDNode *getGlobalBaseReg();
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const X86TargetMachine &getTargetMachine() {
+ return static_cast<const X86TargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const X86InstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+ };
+}
+
+
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ if (!N.hasOneUse())
+ return false;
+
+ if (N.getOpcode() != ISD::LOAD)
+ return true;
+
+ // If N is a load, do additional profitability checks.
+ if (U == Root) {
+ switch (U->getOpcode()) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::AND:
+ case X86ISD::XOR:
+ case X86ISD::OR:
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
+ // If the other operand is a 8-bit immediate we should fold the immediate
+ // instead. This reduces code size.
+ // e.g.
+ // movl 4(%esp), %eax
+ // addl $4, %eax
+ // vs.
+ // movl $4, %eax
+ // addl 4(%esp), %eax
+ // The former is 2 bytes shorter. In case where the increment is 1, then
+ // the saving can be 4 bytes (by using incl %eax).
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+ if (Imm->getAPIntValue().isSignedIntN(8))
+ return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Call, SDValue OrigChain) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Chain = OrigChain.getOperand(0);
+ if (Chain.getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else {
+ assert(Chain.getOpcode() == ISD::TokenFactor &&
+ "Unexpected chain operand");
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+ if (Chain.getOperand(i).getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(Chain.getOperand(i));
+ SDValue NewChain =
+ CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ Ops.clear();
+ Ops.push_back(NewChain);
+ }
+ for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+ Ops.push_back(OrigChain.getOperand(i));
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
+ Load.getOperand(1), Load.getOperand(2));
+ Ops.clear();
+ Ops.push_back(SDValue(Load.getNode(), 1));
+ for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+ Ops.push_back(Call.getOperand(i));
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+ if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+ return false;
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+ if (!LD ||
+ LD->isVolatile() ||
+ LD->getAddressingMode() != ISD::UNINDEXED ||
+ LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ // Now let's find the callseq_start.
+ while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
+ if (!Chain.hasOneUse())
+ return false;
+ Chain = Chain.getOperand(0);
+ }
+
+ if (!Chain.getNumOperands())
+ return false;
+ if (Chain.getOperand(0).getNode() == Callee.getNode())
+ return true;
+ if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+ Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+ Callee.getValue(1).hasOneUse())
+ return true;
+ return false;
+}
+
+void X86DAGToDAGISel::PreprocessISelDAG() {
+ // OptForSize is used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (OptLevel != CodeGenOpt::None &&
+ (N->getOpcode() == X86ISD::CALL ||
+ N->getOpcode() == X86ISD::TC_RETURN)) {
+ /// Also try moving call address load from outside callseq_start to just
+ /// before the call to allow it to be folded.
+ ///
+ /// [Load chain]
+ /// ^
+ /// |
+ /// [Load]
+ /// ^ ^
+ /// | |
+ /// / \--
+ /// / |
+ ///[CALLSEQ_START] |
+ /// ^ |
+ /// | |
+ /// [LOAD/C2Reg] |
+ /// | |
+ /// \ /
+ /// \ /
+ /// [CALL]
+ bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
+ SDValue Chain = N->getOperand(0);
+ SDValue Load = N->getOperand(1);
+ if (!isCalleeLoad(Load, Chain, HasCallSeq))
+ continue;
+ MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+ ++NumLoadMoved;
+ continue;
+ }
+
+ // Lower fpround and fpextend nodes that target the FP stack to be store and
+ // load to the stack. This is a gross hack. We would like to simply mark
+ // these as being illegal, but when we do that, legalize produces these when
+ // it expands calls, then expands these in the same legalize pass. We would
+ // like dag combine to be able to hack on these between the call expansion
+ // and the node legalization. As such this pass basically does "really
+ // late" legalization of these inline with the X86 isel pass.
+ // FIXME: This should only happen when not compiled with -O0.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
+
+ // If any of the sources are vectors, no fp stack involved.
+ if (SrcVT.isVector() || DstVT.isVector())
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ EVT MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+ N->getOperand(0),
+ MemTmp, MachinePointerInfo(), MemVT,
+ false, false, 0);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ CurDAG->DeleteNode(N);
+ }
+}
+
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (Subtarget->isTargetCygMing()) {
+ unsigned CallOp =
+ Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+ BuildMI(BB, DebugLoc(),
+ TII->get(CallOp)).addExternalSymbol("__main");
+ }
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
+ // If this is main, emit special code for main.
+ if (const Function *Fn = MF->getFunction())
+ if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+ EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
+}
+
+static bool isDispSafeForFrameIndex(int64_t Val) {
+ // On 64-bit platforms, we can run into an issue where a frame index
+ // includes a displacement that, when added to the explicit displacement,
+ // will overflow the displacement field. Assuming that the frame index
+ // displacement fits into a 31-bit integer (which is only slightly more
+ // aggressive than the current fundamental assumption that it fits into
+ // a 32-bit integer), a 31-bit disp should always be safe.
+ return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+ X86ISelAddressMode &AM) {
+ int64_t Val = AM.Disp + Offset;
+ CodeModel::Model M = TM.getCodeModel();
+ if (Subtarget->is64Bit()) {
+ if (!X86::isOffsetSuitableForCodeModel(Val, M,
+ AM.hasSymbolicDisplacement()))
+ return true;
+ // In addition to the checks required for a register base, check that
+ // we do not try to use an unsafe Disp with a frame index.
+ if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+ !isDispSafeForFrameIndex(Val))
+ return true;
+ }
+ AM.Disp = Val;
+ return false;
+
+}
+
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+ SDValue Address = N->getOperand(1);
+
+ // load gs:0 -> GS segment register.
+ // load fs:0 -> FS segment register.
+ //
+ // This optimization is valid because the GNU TLS model defines that
+ // gs:0 (or fs:0 on X86-64) contains its own address.
+ // For more information see http://people.redhat.com/drepper/tls.pdf
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ Subtarget->isTargetELF())
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case 256:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case 257:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode. These wrap things that will resolve down into a
+/// symbol reference. If no match is possible, this returns true, otherwise it
+/// returns false.
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
+ return true;
+
+ SDValue N0 = N.getOperand(0);
+ CodeModel::Model M = TM.getCodeModel();
+
+ // Handle X86-64 rip-relative addresses. We check this before checking direct
+ // folding because RIP is preferable to non-RIP accesses.
+ if (Subtarget->is64Bit() &&
+ // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+ // they cannot be folded into immediate fields.
+ // FIXME: This can be improved for kernel and other models?
+ (M == CodeModel::Small || M == CodeModel::Kernel) &&
+ // Base and index reg must be 0 in order to use %rip as base and lowering
+ // must allow RIP.
+ !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.GV = G->getGlobal();
+ AM.SymbolFlags = G->getTargetFlags();
+ if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.SymbolFlags = CP->getTargetFlags();
+ if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+
+ if (N.getOpcode() == X86ISD::WrapperRIP)
+ AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
+ return false;
+ }
+
+ // Handle the case when globals fit in our immediate field: This is true for
+ // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
+ // mode, this results in a non-RIP-relative computation.
+ if (!Subtarget->is64Bit() ||
+ ((M == CodeModel::Small || M == CodeModel::Kernel) &&
+ TM.getRelocationModel() == Reloc::Static)) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+ if (MatchAddressRecursively(N, AM, 0))
+ return true;
+
+ // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+ // a smaller encoding and avoids a scaled-index.
+ if (AM.Scale == 2 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0) {
+ AM.Base_Reg = AM.IndexReg;
+ AM.Scale = 1;
+ }
+
+ // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+ // because it has a smaller encoding.
+ // TODO: Which other code models can use this?
+ if (TM.getCodeModel() == CodeModel::Small &&
+ Subtarget->is64Bit() &&
+ AM.Scale == 1 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0 &&
+ AM.SymbolFlags == X86II::MO_NO_FLAG &&
+ AM.hasSymbolicDisplacement())
+ AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+ return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ DebugLoc dl = N.getDebugLoc();
+ DEBUG({
+ dbgs() << "MatchAddress: ";
+ AM.dump();
+ });
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // If this is already a %rip relative address, we can only merge immediates
+ // into it. Instead of handling this in every case, we handle it here.
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRelative()) {
+ // FIXME: JumpTable and ExternalSymbol address currently don't like
+ // displacements. It isn't very important, but this should be fixed for
+ // consistency.
+ if (!AM.ES && AM.JT != -1) return true;
+
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+ if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+ return false;
+ return true;
+ }
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!FoldOffsetIntoAddress(Val, AM))
+ return false;
+ break;
+ }
+
+ case X86ISD::Wrapper:
+ case X86ISD::WrapperRIP:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::LOAD:
+ if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
+ AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+ AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SHL:
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
+ break;
+
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+ unsigned Val = CN->getZExtValue();
+ // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+ // that the base operand remains free for further matching. If
+ // the base doesn't end up getting used, a post-processing step
+ // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDValue ShVal = N.getNode()->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (CurDAG->isBaseWithConstantOffset(ShVal)) {
+ AM.IndexReg = ShVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+ uint64_t Disp = AddVal->getSExtValue() << Val;
+ if (!FoldOffsetIntoAddress(Disp, AM))
+ return false;
+ }
+
+ AM.IndexReg = ShVal;
+ return false;
+ }
+ break;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ // A mul_lohi where we need the low part can be folded as a plain multiply.
+ if (N.getResNo() != 0) break;
+ // FALL THROUGH
+ case ISD::MUL:
+ case X86ISD::MUL_IMM:
+ // X*[3,5,9] -> X+X*[2,4,8]
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0) {
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+ if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+ CN->getZExtValue() == 9) {
+ AM.Scale = unsigned(CN->getZExtValue())-1;
+
+ SDValue MulVal = N.getNode()->getOperand(0);
+ SDValue Reg;
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+ isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+ Reg = MulVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+ uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+ if (FoldOffsetIntoAddress(Disp, AM))
+ Reg = N.getNode()->getOperand(0);
+ } else {
+ Reg = N.getNode()->getOperand(0);
+ }
+
+ AM.IndexReg = AM.Base_Reg = Reg;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ // Test if the LHS of the sub can be folded.
+ X86ISelAddressMode Backup = AM;
+ if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
+ AM = Backup;
+ break;
+ }
+
+ int Cost = 0;
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+ // If the RHS involves a register with multiple uses, this
+ // transformation incurs an extra mov, due to the neg instruction
+ // clobbering its operand.
+ if (!RHS.getNode()->hasOneUse() ||
+ RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+ RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+ RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+ (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+ ++Cost;
+ // If the base is a register with multiple uses, this
+ // transformation may save a mov.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() &&
+ !AM.Base_Reg.getNode()->hasOneUse()) ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ --Cost;
+ // If the folded LHS was interesting, this transformation saves
+ // address arithmetic.
+ if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+ ((AM.Disp != 0) && (Backup.Disp == 0)) +
+ (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+ --Cost;
+ // If it doesn't look like it may be an overall win, don't do it.
+ if (Cost >= 0) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+ AM.Scale = 1;
+
+ // Insert the new nodes into the topological ordering.
+ if (Zero.getNode()->getNodeId() == -1 ||
+ Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+ Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Neg.getNode()->getNodeId() == -1 ||
+ Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+ Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ return false;
+ }
+
+ case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+ !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base_Reg.getNode() &&
+ !AM.IndexReg.getNode()) {
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ N = Handle.getValue();
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ X86ISelAddressMode Backup = AM;
+ ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
+
+ // Start with the LHS as an addr mode.
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
+ return false;
+ AM = Backup;
+ }
+ break;
+
+ case ISD::AND: {
+ // Perform some heroic transforms on an and of a constant-count shift
+ // with a constant to enable use of the scaled offset field.
+
+ SDValue Shift = N.getOperand(0);
+ if (Shift.getNumOperands() != 2) break;
+
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ SDValue X = Shift.getOperand(0);
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!C1 || !C2) break;
+
+ // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+ // allows us to convert the shift and and into an h-register extract and
+ // a scaled index.
+ if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+ unsigned ScaleLog = 8 - C1->getZExtValue();
+ if (ScaleLog > 0 && ScaleLog < 4 &&
+ C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+ SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+ SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+ SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ X, Eight);
+ SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+ Srl, Mask);
+ SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ And, ShlCount);
+
+ // Insert the new nodes into the topological ordering.
+ if (Eight.getNode()->getNodeId() == -1 ||
+ Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+ Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Mask.getNode()->getNodeId() == -1 ||
+ Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+ Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Srl.getNode()->getNodeId() == -1 ||
+ Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+ Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (And.getNode()->getNodeId() == -1 ||
+ And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), And.getNode());
+ And.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (ShlCount.getNode()->getNodeId() == -1 ||
+ ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
+ ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Shl.getNode()->getNodeId() == -1 ||
+ Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Shl.getNode());
+ Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ CurDAG->ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+ }
+ }
+
+ // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+ // allows us to fold the shift into this addressing mode.
+ if (Shift.getOpcode() != ISD::SHL) break;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ break;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftCst = C1->getZExtValue();
+ if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+ break;
+
+ // Get the new AND mask, this folds to a constant.
+ SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ SDValue(C2, 0), SDValue(C1, 0));
+ SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
+ NewANDMask);
+ SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ NewAND, SDValue(C1, 0));
+
+ // Insert the new nodes into the topological ordering.
+ if (C1->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), C1);
+ C1->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewANDMask.getNode()->getNodeId() == -1 ||
+ NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
+ NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewAND.getNode()->getNodeId() == -1 ||
+ NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
+ NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (NewSHIFT.getNode()->getNodeId() == -1 ||
+ NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
+ NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+
+ CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+
+ AM.Scale = 1 << ShiftCst;
+ AM.IndexReg = NewAND;
+ return false;
+ }
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
+ // If so, check to see if the scale index register is set.
+ if (AM.IndexReg.getNode() == 0) {
+ AM.IndexReg = N;
+ AM.Scale = 1;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base_Reg = N;
+ return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+///
+/// Parent is the parent node of the addr operand that is being matched. It
+/// is always a load, store, atomic node, or null. It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ X86ISelAddressMode AM;
+
+ if (Parent &&
+ // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+ // that are not a MemSDNode, and thus don't have proper addrspace info.
+ Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+ Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+ Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ unsigned AddrSpace =
+ cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ // AddrSpace 256 -> GS, 257 -> FS.
+ if (AddrSpace == 256)
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ if (AddrSpace == 257)
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ }
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base_Reg.getNode())
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+///
+/// We also return:
+/// PatternChainNode: this is the matched node that has a chain input and
+/// output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+ SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment,
+ SDValue &PatternNodeWithChain) {
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ PatternNodeWithChain = N.getOperand(0);
+ if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+ PatternNodeWithChain.hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ return true;
+ }
+ }
+
+ // Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+ if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+ // Check to see if the top elements are all zeros (or bitcast of zeros).
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getNode()->hasOneUse() &&
+ ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+ N.getOperand(0).getOperand(0).hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ // Okay, this is a zero extending load. Fold it.
+ LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ PatternNodeWithChain = SDValue(LD, 0);
+ return true;
+ }
+ return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ X86ISelAddressMode AM;
+
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
+ if (MatchAddress(N, AM))
+ return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
+
+ EVT VT = N.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == X86ISelAddressMode::RegBase)
+ if (AM.Base_Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity++;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+ // a simple shift.
+ if (AM.Scale > 1)
+ Complexity++;
+
+ // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+ // to a LEA. This is determined with some expermentation but is by no means
+ // optimal (especially for code size consideration). LEA is nice because of
+ // its three-address nature. Tweak the cost function again when we can run
+ // convertToThreeAddress() at register allocation time.
+ if (AM.hasSymbolicDisplacement()) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
+
+ if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity++;
+
+ // If it isn't worth using an LEA, reject it.
+ if (Complexity <= 2)
+ return false;
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+
+ X86ISelAddressMode AM;
+ AM.GV = GA->getGlobal();
+ AM.Disp += GA->getOffset();
+ AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
+ AM.SymbolFlags = GA->getTargetFlags();
+
+ if (N.getValueType() == MVT::i32) {
+ AM.Scale = 1;
+ AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+ } else {
+ AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+ }
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ if (!ISD::isNON_EXTLoad(N.getNode()) ||
+ !IsProfitableToFold(N, P, P) ||
+ !IsLegalToFold(N, P, P, OptLevel))
+ return false;
+
+ return SelectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = Node->getOperand(2);
+ SDValue In2H = Node->getOperand(3);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return NULL;
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
+// FIXME: Figure out some way to unify this with the 'or' and other code
+// below.
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_add_and_fetch and
+ // __sync_sub_and_fetch where the result is not used. This allows us
+ // to use "lock" version of add, sub, inc, dec instructions.
+ // FIXME: Do not use special instructions but instead add the "lock"
+ // prefix to the target node somehow. The extra information will then be
+ // transferred to machine instruction and it denotes the prefix.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ bool isInc = false, isDec = false, isSub = false, isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
+ isCN = true;
+ int64_t CNVal = CN->getSExtValue();
+ if (CNVal == 1)
+ isInc = true;
+ else if (CNVal == -1)
+ isDec = true;
+ else if (CNVal >= 0)
+ Val = CurDAG->getTargetConstant(CNVal, NVT);
+ else {
+ isSub = true;
+ Val = CurDAG->getTargetConstant(-CNVal, NVT);
+ }
+ } else if (Val.hasOneUse() &&
+ Val.getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0))) {
+ isSub = true;
+ Val = Val.getOperand(1);
+ }
+
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isInc)
+ Opc = X86::LOCK_INC8m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC8m;
+ else if (isSub) {
+ if (isCN)
+ Opc = X86::LOCK_SUB8mi;
+ else
+ Opc = X86::LOCK_SUB8mr;
+ } else {
+ if (isCN)
+ Opc = X86::LOCK_ADD8mi;
+ else
+ Opc = X86::LOCK_ADD8mr;
+ }
+ break;
+ case MVT::i16:
+ if (isInc)
+ Opc = X86::LOCK_INC16m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC16m;
+ else if (isSub) {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_SUB16mi8;
+ else
+ Opc = X86::LOCK_SUB16mi;
+ } else
+ Opc = X86::LOCK_SUB16mr;
+ } else {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_ADD16mi8;
+ else
+ Opc = X86::LOCK_ADD16mi;
+ } else
+ Opc = X86::LOCK_ADD16mr;
+ }
+ break;
+ case MVT::i32:
+ if (isInc)
+ Opc = X86::LOCK_INC32m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC32m;
+ else if (isSub) {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_SUB32mi8;
+ else
+ Opc = X86::LOCK_SUB32mi;
+ } else
+ Opc = X86::LOCK_SUB32mr;
+ } else {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_ADD32mi8;
+ else
+ Opc = X86::LOCK_ADD32mi;
+ } else
+ Opc = X86::LOCK_ADD32mr;
+ }
+ break;
+ case MVT::i64:
+ if (isInc)
+ Opc = X86::LOCK_INC64m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC64m;
+ else if (isSub) {
+ Opc = X86::LOCK_SUB64mr;
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi8;
+ else if (i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi32;
+ }
+ } else {
+ Opc = X86::LOCK_ADD64mr;
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi8;
+ else if (i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi32;
+ }
+ }
+ break;
+ }
+
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ if (isInc || isDec) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ }
+}
+
+enum AtomicOpc {
+ OR,
+ AND,
+ XOR,
+ AtomicOpcEnd
+};
+
+enum AtomicSz {
+ ConstantI8,
+ I8,
+ SextConstantI16,
+ ConstantI16,
+ I16,
+ SextConstantI32,
+ ConstantI32,
+ I32,
+ SextConstantI64,
+ ConstantI64,
+ I64,
+ AtomicSzEnd
+};
+
+static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+ {
+ X86::LOCK_OR8mi,
+ X86::LOCK_OR8mr,
+ X86::LOCK_OR16mi8,
+ X86::LOCK_OR16mi,
+ X86::LOCK_OR16mr,
+ X86::LOCK_OR32mi8,
+ X86::LOCK_OR32mi,
+ X86::LOCK_OR32mr,
+ X86::LOCK_OR64mi8,
+ X86::LOCK_OR64mi32,
+ X86::LOCK_OR64mr
+ },
+ {
+ X86::LOCK_AND8mi,
+ X86::LOCK_AND8mr,
+ X86::LOCK_AND16mi8,
+ X86::LOCK_AND16mi,
+ X86::LOCK_AND16mr,
+ X86::LOCK_AND32mi8,
+ X86::LOCK_AND32mi,
+ X86::LOCK_AND32mr,
+ X86::LOCK_AND64mi8,
+ X86::LOCK_AND64mi32,
+ X86::LOCK_AND64mr
+ },
+ {
+ X86::LOCK_XOR8mi,
+ X86::LOCK_XOR8mr,
+ X86::LOCK_XOR16mi8,
+ X86::LOCK_XOR16mi,
+ X86::LOCK_XOR16mr,
+ X86::LOCK_XOR32mi8,
+ X86::LOCK_XOR32mi,
+ X86::LOCK_XOR32mr,
+ X86::LOCK_XOR64mi8,
+ X86::LOCK_XOR64mi32,
+ X86::LOCK_XOR64mr
+ }
+};
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_or_and_fetch and similar arith
+ // operations where the result is not used. This allows us to use the "lock"
+ // version of the arithmetic instruction.
+ // FIXME: Same as for 'add' and 'sub', try to merge those down here.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ // Which index into the table.
+ enum AtomicOpc Op;
+ switch (Node->getOpcode()) {
+ case ISD::ATOMIC_LOAD_OR:
+ Op = OR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Op = AND;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Op = XOR;
+ break;
+ default:
+ return 0;
+ }
+
+ bool isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
+ isCN = true;
+ Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+ }
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isCN)
+ Opc = AtomicOpcTbl[Op][ConstantI8];
+ else
+ Opc = AtomicOpcTbl[Op][I8];
+ break;
+ case MVT::i16:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI16];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI16];
+ } else
+ Opc = AtomicOpcTbl[Op][I16];
+ break;
+ case MVT::i32:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI32];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI32];
+ } else
+ Opc = AtomicOpcTbl[Op][I32];
+ break;
+ case MVT::i64:
+ Opc = AtomicOpcTbl[Op][I64];
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI64];
+ else if (i64immSExt32(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][ConstantI64];
+ }
+ break;
+ }
+
+ assert(Opc != 0 && "Invalid arith lock transform!");
+
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+ // Examine each user of the node.
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); UI != UE; ++UI) {
+ // Only examine CopyToReg uses.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+ X86::EFLAGS)
+ return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(),
+ FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1) continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode()) return false;
+ // Examine the opcode of the user.
+ switch (FlagUI->getMachineOpcode()) {
+ // These comparisons don't treat the most significant bit specially.
+ case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+ case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+ case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+ case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+ case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+ case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+ case X86::CMOVA16rr: case X86::CMOVA16rm:
+ case X86::CMOVA32rr: case X86::CMOVA32rm:
+ case X86::CMOVA64rr: case X86::CMOVA64rm:
+ case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+ case X86::CMOVB16rr: case X86::CMOVB16rm:
+ case X86::CMOVB32rr: case X86::CMOVB32rm:
+ case X86::CMOVB64rr: case X86::CMOVB64rm:
+ case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+ case X86::CMOVE16rr: case X86::CMOVE16rm:
+ case X86::CMOVE32rr: case X86::CMOVE32rm:
+ case X86::CMOVE64rr: case X86::CMOVE64rm:
+ case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+ case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+ case X86::CMOVP16rr: case X86::CMOVP16rm:
+ case X86::CMOVP32rr: case X86::CMOVP32rm:
+ case X86::CMOVP64rr: case X86::CMOVP64rm:
+ continue;
+ // Anything else: assume conservatively.
+ default: return false;
+ }
+ }
+ }
+ return true;
+}
+
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
+ EVT NVT = Node->getValueType(0);
+ unsigned Opc, MOpc;
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
+
+ if (Node->isMachineOpcode()) {
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case X86ISD::ATOMOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMOR6432);
+ case X86ISD::ATOMXOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMXOR6432);
+ case X86ISD::ATOMADD64_DAG:
+ return SelectAtomic64(Node, X86::ATOMADD6432);
+ case X86ISD::ATOMSUB64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSUB6432);
+ case X86ISD::ATOMNAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMNAND6432);
+ case X86ISD::ATOMAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMAND6432);
+ case X86ISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+ case ISD::ATOMIC_LOAD_ADD: {
+ SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR: {
+ SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ break;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!Cst || !ShlCst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+ uint64_t ShlVal = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ break;
+
+ unsigned ShlOp, Op = 0;
+ EVT CstVT = NVT;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: AND32ri is the same as AND64ri32 with zext imm.
+ // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+ CstVT = MVT::i8;
+ else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+ CstVT = MVT::i32;
+
+ // Bail if there is no smaller encoding.
+ if (NVT == CstVT)
+ break;
+
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ assert(CstVT == MVT::i8);
+ ShlOp = X86::SHL32ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = X86::AND32ri8; break;
+ case ISD::OR: Op = X86::OR32ri8; break;
+ case ISD::XOR: Op = X86::XOR32ri8; break;
+ }
+ break;
+ case MVT::i64:
+ assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+ ShlOp = X86::SHL64ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+ case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
+ case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ }
+ break;
+ }
+
+ // Emit the smaller op and the shift.
+ SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+ SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+ getI8Imm(ShlVal));
+ break;
+ }
+ case X86ISD::UMUL: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ unsigned LoReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
+ case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
+ case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+ case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+ return NULL;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ }
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+ }
+ }
+
+ unsigned LoReg, HiReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
+ case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
+ case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (foldedLoad)
+ std::swap(N0, N1);
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+ InFlag = SDValue(CNode, 0);
+ }
+
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
+ // Copy the low half of the result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+
+ return NULL;
+ }
+
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ }
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+ }
+
+ unsigned LoReg, HiReg, ClrReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; ClrReg = HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+ SDValue InFlag;
+ if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+ MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
+ } else {
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+ InFlag = Chain.getValue(1);
+ } else {
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ LoReg, N0, SDValue()).getValue(1);
+ if (isSigned && !signBitIsZero) {
+ // Sign extend the low part into the high part.
+ InFlag =
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDValue ClrNode =
+ SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
+ ClrNode, InFlag).getValue(1);
+ }
+ }
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
+ }
+
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
+ // Copy the division (low) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ return NULL;
+ }
+
+ case X86ISD::CMP: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+ // use a smaller encoding.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
+ // Look past the truncate if CMP is the only use of it.
+ N0 = N0.getOperand(0);
+ if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ N0.getValueType() != MVT::i8 &&
+ X86::isZeroNode(N1)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+ if (!C) break;
+
+ // For example, convert "testl %eax, $8" to "testb %al, $8"
+ if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+ (!(C->getZExtValue() & 0x80) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // On x86-32, only the ABCD registers have 8-bit subregisters.
+ if (!Subtarget->is64Bit()) {
+ TargetRegisterClass *TRC = 0;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+ }
+
+ // Extract the l-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb.
+ return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ }
+
+ // For example, "testl %eax, $2048" to "testb %ah, $8".
+ if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ // Shift the immediate right by 8 bits.
+ SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+ MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Put the value in an ABCD register.
+ TargetRegisterClass *TRC = 0;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+
+ // Extract the h-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
+ // target GR8_NOREX registers, so make sure the register class is
+ // forced.
+ return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
+ Subreg, ShiftedImm);
+ }
+
+ // For example, "testl %eax, $32776" to "testw %ax, $32776".
+ if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+ N0.getValueType() != MVT::i16 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 16-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
+ MVT::i16, Reg);
+
+ // Emit a testw.
+ return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ }
+
+ // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+ if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+ N0.getValueType() == MVT::i64 &&
+ (!(C->getZExtValue() & 0x80000000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 32-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
+ MVT::i32, Reg);
+
+ // Emit a testl.
+ return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ }
+ }
+ break;
+ }
+ }
+
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << '\n');
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1, Op2, Op3, Op4;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ OutOps.push_back(Op4);
+ return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 0000000..7c8ce17
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,14965 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+using namespace dwarf;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2);
+
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference. Idx is an index in the 128 bits we
+/// want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+ EVT ElVT = VT.getVectorElementType();
+ int Factor = VT.getSizeInBits()/128;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+ if (isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ if (isa<ConstantSDNode>(Idx)) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ if (Subtarget->isTargetEnvMacho()) {
+ if (is64Bit)
+ return new X8664_MachoTargetObjectFile();
+ return new TargetLoweringObjectFileMachO();
+ }
+
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ return new TargetLoweringObjectFileCOFF();
+ llvm_unreachable("unknown subtarget type");
+}
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasXMMInt();
+ X86ScalarSSEf32 = Subtarget->hasXMM();
+ X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+ RegInfo = TM.getRegisterInfo();
+ TD = getTargetData();
+
+ // Set up the TargetLowering object.
+ static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
+
+ // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // For 64-bit since we have so many registers use the ILP scheduler, for
+ // 32-bit code use the register pressure specific scheduling.
+ if (Subtarget->is64Bit())
+ setSchedulingPreference(Sched::ILP);
+ else
+ setSchedulingPreference(Sched::RegPressure);
+ setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::SREM_I64, "_allrem");
+ setLibcallName(RTLIB::UREM_I64, "_aullrem");
+ setLibcallName(RTLIB::MUL_I64, "_allmul");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+ }
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(false);
+ setUseUnderscoreLongJmp(false);
+ } else if (Subtarget->isTargetMingw()) {
+ // MS runtime is weird: it exports _setjmp, but longjmp!
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(false);
+ } else {
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+ }
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+ addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+ addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ // SETOEQ and SETUNE require checking two conditions.
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
+ } else if (!UseSoftFloat) {
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ // We have an algorithm for SSE2, and we turn this into a 64-bit
+ // FILD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ }
+
+ // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
+
+ if (!UseSoftFloat) {
+ // SSE has no i16 to fp conversion, only i32
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ }
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
+ }
+
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else if (!UseSoftFloat) {
+ // Since AVX is a superset of SSE3, only check for SSE here.
+ if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64; without
+ // SSE, we're stuck with a fistpll.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ }
+
+ // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory.
+ setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
+ }
+ }
+
+ // Scalar integer divide and remainder are lowered to use operations that
+ // produce two results, to match the available instructions. This exposes
+ // the two-result form to trivial CSE, which is able to combine x/y and x%y
+ // into a single instruction.
+ //
+ // Scalar integer multiply-high is also lowered to use two-result
+ // operations, to match the available instructions. However, plain multiply
+ // (low) operations are left as Legal, as there are single-result
+ // instructions for this in x86. Using the two-result multiply instructions
+ // when both high and low results are needed must be arranged by dagcombine.
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+ setOperationAction(ISD::ADDC, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Custom);
+ setOperationAction(ISD::SUBC, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Custom);
+ }
+
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND , MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+ setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+
+ if (Subtarget->hasBMI()) {
+ setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasLZCNT()) {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
+ setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ // These should be promoted to a larger select which is supported.
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
+ // X86 wants to expand cmov itself.
+ setOperationAction(ISD::SELECT , MVT::i8 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
+ setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+
+ // Darwin ABI issue.
+ setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
+ }
+ // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+ setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasXMM())
+ setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
+
+ // On X86 and X86-64, atomic operations are lowered to locked instructions.
+ // Locked instructions, in turn, have implicit fence semantics (all memory
+ // operations are flushed before issuing the locked instruction, and they
+ // are not buffered), so we can fold away the common pattern of
+ // fence-atomic-fence.
+ setShouldFoldAtomicFences(true);
+
+ // Expand certain atomics
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+ }
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ }
+
+ if (Subtarget->hasCmpxchg16b()) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ }
+
+ // FIXME - use subtarget debug flags
+ if (!Subtarget->isTargetDarwin() &&
+ !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing()) {
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ if (Subtarget->is64Bit()) {
+ setExceptionPointerRegister(X86::RAX);
+ setExceptionSelectorRegister(X86::RDX);
+ } else {
+ setExceptionPointerRegister(X86::EAX);
+ setExceptionSelectorRegister(X86::EDX);
+ }
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else if (EnableSegmentedStacks)
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Expand);
+
+ if (!UseSoftFloat && X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ // Use ANDPD and ORPD to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // Lower this to FGETSIGNx86 plus an AND.
+ setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
+ setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Special cases we handle for FP constants.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ } else if (!UseSoftFloat) {
+ // f32 and f64 in x87.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // We don't support FMA.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ // Long double always uses X87.
+ if (!UseSoftFloat) {
+ addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+ {
+ APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
+ addLegalFPImmediate(TmpFlt); // FLD0
+ TmpFlt.changeSign();
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+
+ bool ignored;
+ APFloat TmpFlt2(+1.0);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.changeSign();
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ }
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f80 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ }
+
+ setOperationAction(ISD::FMA, MVT::f80, Expand);
+ }
+
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+ // First set operation action for all vector types to either promote
+ // (for widening) or expand (for scalarization). Then we will selectively
+ // turn on ones that can be effectively codegen'd.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+ // with -msoft-float, disable use of MMX as well.
+ if (!UseSoftFloat && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+ // No operations on x86mmx supported, everything uses intrinsics.
+ }
+
+ // MMX-sized vectors (other than x86mmx) are expected to be expanded
+ // into smaller operations.
+ setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
+ setOperationAction(ISD::AND, MVT::v8i8, Expand);
+ setOperationAction(ISD::AND, MVT::v4i16, Expand);
+ setOperationAction(ISD::AND, MVT::v2i32, Expand);
+ setOperationAction(ISD::AND, MVT::v1i64, Expand);
+ setOperationAction(ISD::OR, MVT::v8i8, Expand);
+ setOperationAction(ISD::OR, MVT::v4i16, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
+ setOperationAction(ISD::OR, MVT::v1i64, Expand);
+ setOperationAction(ISD::XOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasXMM()) {
+ addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ }
+
+ if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+ addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+
+ // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // registers cannot be used even for integer operations.
+ addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v16i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+ EVT VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ continue;
+ // Do not attempt to custom lower non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
+ setOperationAction(ISD::BUILD_VECTOR,
+ VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE,
+ VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+ VT.getSimpleVT().SimpleTy, Custom);
+ }
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
+
+ // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Do not attempt to promote non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
+
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom lower v2i64 and v2f64 selects.
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ }
+
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+ // FIXME: Do we need to handle scalar-to-vector here?
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
+ // Can turn SHL into an integer multiply.
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+ // i8 and i16 vectors are custom , because the source register and source
+ // source memory operand types are not the same width. f32 vectors are
+ // custom since the immediate controlling the insert encodes additional
+ // information.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ }
+
+ if (Subtarget->hasXMMInt()) {
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ }
+
+ if (Subtarget->hasSSE42() || Subtarget->hasAVX())
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ if (!UseSoftFloat && Subtarget->hasAVX()) {
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+
+ setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+
+ setOperationAction(ISD::FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+
+ setOperationAction(ISD::ADD, MVT::v4i64, Custom);
+ setOperationAction(ISD::ADD, MVT::v8i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i16, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i32, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i16, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, Custom);
+ // Don't lower v32i8 because there is no 128-bit byte mul
+
+ // Custom lower several nodes for 256-bit types.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Extract subvector is special because the value type
+ // (result) is 128-bit but the source is 256-bit wide.
+ if (VT.is128BitVector())
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+ // Do not attempt to custom lower other non-256-bit vectors
+ if (!VT.is256BitVector())
+ continue;
+
+ setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
+ }
+
+ // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+ for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Do not attempt to promote non-256-bit vectors
+ if (!VT.is256BitVector())
+ continue;
+
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+ }
+ }
+
+ // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+ // of this type with custom code.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+
+ // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+ // handle type legalization for these operations here.
+ //
+ // FIXME: We really should do custom legalization for addition and
+ // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
+ // than generic legalization for 64-bit multiplication-with-overflow, though.
+ for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SMULO, VT, Custom);
+ setOperationAction(ISD::UMULO, VT, Custom);
+ }
+
+ // There are no 8-bit 3-address imul/mul instructions
+ setOperationAction(ISD::SMULO, MVT::i8, Expand);
+ setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
+ if (!Subtarget->is64Bit()) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ if (Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::MUL);
+
+ computeRegisterProperties();
+
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
+ maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ setPrefLoopAlignment(16);
+ benefitFromCodePlacementOpt = true;
+
+ setPrefFunctionAlignment(4);
+}
+
+
+EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i8;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+ return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
+ if (Subtarget->is64Bit()) {
+ // Max of 8 and alignment of type.
+ unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ if (TyAlign > 8)
+ return TyAlign;
+ return 8;
+ }
+
+ unsigned Align = 4;
+ if (Subtarget->hasXMM())
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+ // linux. This is because the stack realignment code can't handle certain
+ // cases like PR2962. This should be removed when PR2962 is fixed.
+ const Function *F = MF.getFunction();
+ if (NonScalarIntSafe &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16))) &&
+ Subtarget->getStackAlignment() >= 16) {
+ if (Subtarget->hasAVX() &&
+ Subtarget->getStackAlignment() >= 32)
+ return MVT::v8f32;
+ if (Subtarget->hasXMMInt())
+ return MVT::v4i32;
+ if (Subtarget->hasXMM())
+ return MVT::v4f32;
+ } else if (!MemcpyStrSrc && Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->getStackAlignment() >= 8 &&
+ Subtarget->hasXMMInt()) {
+ // Do not use f64 to lower memcpy if source is string constant. It's
+ // better to use i32 to avoid the loads.
+ return MVT::f64;
+ }
+ }
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+ // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+ // symbol.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ return MachineJumpTableInfo::EK_Custom32;
+
+ // Otherwise, use the normal jump table encoding heuristics.
+ return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid,MCContext &Ctx) const{
+ assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT());
+ // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+ // entries.
+ return MCSymbolRefExpr::Create(MBB->getSymbol(),
+ MCSymbolRefExpr::VK_GOTOFF, Ctx);
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (!Subtarget->is64Bit())
+ // This doesn't have DebugLoc associated with it, but is not really the
+ // same as a Register.
+ return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+ MCContext &Ctx) const {
+ // X86-64 uses RIP relative addressing based on the jump table label.
+ if (Subtarget->isPICStyleRIPRel())
+ return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+ // Otherwise, the reference is relative to the PIC base.
+ return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+}
+
+// FIXME: Why this routine is here? Move to RegInfo!
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = (Subtarget->is64Bit()
+ ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+ break;
+ case MVT::x86mmx:
+ RRC = X86::VR64RegisterClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+ case MVT::v4f64:
+ RRC = X86::VR128RegisterClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+ unsigned &Offset) const {
+ if (!Subtarget->isTargetLinux())
+ return false;
+
+ if (Subtarget->is64Bit()) {
+ // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x28;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x14 on i386
+ Offset = 0x14;
+ AddressSpace = 256;
+ }
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
+}
+
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ // Add the regs to the liveout set for the function.
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+
+ SDValue Flag;
+
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+ MVT::i16));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue ValToCopy = OutVals[i];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values,
+ // or SSE or MMX vectors.
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+ VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+ (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+ // Likewise we can't return F64 values with SSE1 only. gcc does so, but
+ // llvm-gcc has never done it right and no one has noticed, so this
+ // should be OK for now.
+ if (ValVT == MVT::f64 &&
+ (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
+ report_fatal_error("SSE2 register return with SSE2 disabled");
+
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(VA.getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+ RetOps.push_back(ValToCopy);
+ // Don't emit a copytoreg.
+ continue;
+ }
+
+ // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+ // which is returned in RAX / RDX.
+ if (Subtarget->is64Bit()) {
+ if (ValVT == MVT::x86mmx) {
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget->hasXMMInt())
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+ }
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() &&
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments().");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ Flag = Chain.getValue(1);
+
+ // RAX now acts like a return value.
+ MRI.addLiveOut(X86::RAX);
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(X86ISD::RET_FLAG, dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+}
+
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg &&
+ Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ return HasRet;
+}
+
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const {
+ MVT ReturnMVT;
+ // TODO: Is this also valid on 32-bit?
+ if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+ ReturnMVT = MVT::i8;
+ else
+ ReturnMVT = MVT::i32;
+
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
+ return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool Is64Bit = Subtarget->is64Bit();
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ EVT CopyVT = VA.getValVT();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+
+ SDValue Val;
+
+ // If this is a call to a function that returns an fp value on the floating
+ // point stack, we must guarantee the the value is popped from the stack, so
+ // a CopyFromReg is not good enough - the copy instruction may be eliminated
+ // if the return value is not used. We use the FpPOP_RETVAL instruction
+ // instead.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+ SDValue Ops[] = { Chain, InFlag };
+ Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
+ MVT::Other, MVT::Glue, Ops, 2), 1);
+ Val = Chain.getValue(0);
+
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register.
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a call uses struct return
+/// semantics.
+static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ if (Outs.empty())
+ return false;
+
+ return Outs[0].Flags.isSRet();
+}
+
+/// ArgsAreStructReturn - Determines whether a function uses struct
+/// return semantics.
+static bool
+ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ if (Ins.empty())
+ return false;
+
+ return Ins[0].Flags.isSRet();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+}
+
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+
+ CallSite CS(CI);
+ CallingConv::ID CalleeCC = CS.getCallingConv();
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ return true;
+}
+
+/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// a tailcall target by changing its ABI.
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+ return GuaranteedTailCallOpt && IsTailCallConvention(CC);
+}
+
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const {
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+ EVT ValVT;
+
+ // If value is passed by pointer we have address passed instead of the value
+ // itself.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ValVT = VA.getLocVT();
+ else
+ ValVT = VA.getValVT();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ if (Flags.isByVal()) {
+ unsigned Bytes = Flags.getByValSize();
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+ return DAG.getFrameIndex(FI, getPointerTy());
+ } else {
+ int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ return DAG.getLoad(ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ }
+}
+
+SDValue
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc or ghc");
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+
+ unsigned LastVal = ~0U;
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ (void)LastVal;
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = NULL;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = X86::GR64RegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = X86::FR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ RC = X86::VR256RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ RC = X86::VR128RegisterClass;
+ else if (RegVT == MVT::x86mmx)
+ RC = X86::VR64RegisterClass;
+ else
+ llvm_unreachable("Unknown argument type!");
+
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+
+ if (VA.isExtInLoc()) {
+ // Handle MMX values passed in XMM regs.
+ if (RegVT.isVector()) {
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+ ArgValue);
+ } else
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+ } else {
+ assert(VA.isMemLoc());
+ ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
+ }
+
+ // If value is passed via pointer - do a load.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+ MachinePointerInfo(), false, false, 0);
+
+ InVals.push_back(ArgValue);
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+ // Align stack specially for tail calls.
+ if (FuncIsMadeTailCallSafe(CallConv))
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
+ }
+ if (Is64Bit) {
+ unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+ // FIXME: We should really autogenerate these arrays
+ static const unsigned GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ static const unsigned GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ const unsigned *GPR64ArgRegs;
+ unsigned NumXMMRegs = 0;
+
+ if (IsWin64) {
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ TotalNumIntRegs = 4;
+ GPR64ArgRegs = GPR64ArgRegsWin64;
+ } else {
+ TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+ GPR64ArgRegs = GPR64ArgRegs64Bit;
+
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
+ }
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+ TotalNumIntRegs);
+
+ bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
+ "SSE register cannot be used when SSE is disabled!");
+ assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+ // Kernel mode asks for SSE to be disabled, so don't push them
+ // on the stack.
+ TotalNumXMMRegs = 0;
+
+ if (IsWin64) {
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+ false));
+ }
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ unsigned Offset = FuncInfo->getVarArgsGPOffset();
+ for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(Offset));
+ unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ Offset += 8;
+ }
+
+ if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+ // Now store the XMM (fp + vector) parameter registers.
+ SmallVector<SDValue, 11> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+ SaveXMMOps.push_back(ALVal);
+
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getRegSaveFrameIndex()));
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getVarArgsFPOffset()));
+
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+ unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+ SaveXMMOps.push_back(Val);
+ }
+ MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+ MVT::Other,
+ &SaveXMMOps[0], SaveXMMOps.size()));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+ }
+
+ // Some CCs need callee pop.
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+ FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+ } else {
+ FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
+ FuncInfo->setBytesToPopOnReturn(4);
+ }
+
+ if (!Is64Bit) {
+ // RegSaveFrameIndex is X86-64 only.
+ FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+ if (CallConv == CallingConv::X86_FastCall ||
+ CallConv == CallingConv::X86_ThisCall)
+ // fastcc functions can't have varargs.
+ FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+ }
+
+ FuncInfo->setArgumentStackSize(StackSize);
+
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal())
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDValue &OutRetAddr, SDValue Chain,
+ bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl) const {
+ // Adjust the Return address stack slot.
+ EVT VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+ false, false, 0);
+ return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx,
+ bool Is64Bit, int FPDiff, DebugLoc dl) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
+ EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+ false, false, 0);
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+ bool IsStructRet = CallIsStructReturn(Outs);
+ bool IsSibcall = false;
+
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+
+ // Sibcalls are automatically detected tailcalls which do not require
+ // ABI changes.
+ if (!GuaranteedTailCallOpt && isTailCall)
+ IsSibcall = true;
+
+ if (isTailCall)
+ ++NumTailCalls;
+ }
+
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc or ghc");
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ if (IsSibcall)
+ // This is a sibcall. The memory operands are available in caller's
+ // own caller's stack.
+ NumBytes = 0;
+ else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ int FPDiff = 0;
+ if (isTailCall && !IsSibcall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ if (!IsSibcall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue RetAddrFrIdx;
+ // Load return address for tail calls.
+ if (isTailCall && FPDiff)
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+ Is64Bit, FPDiff, dl);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ } else
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+ break;
+ case CCValAssign::Indirect: {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ Arg = SpillSlot;
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (isVarArg && IsWin64) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ unsigned ShadowReg = 0;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
+ } else if (!IsSibcall && (!isTailCall || isByVal)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (Subtarget->isPICStyleGOT()) {
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (!isTailCall) {
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ // If we are tail calling and generating PIC/GOT style code load the
+ // address of the callee into ECX. The value in ecx is used as target of
+ // the tail jump. This is done to circumvent the ebx/callee-saved problem
+ // for tail calls on PIC/GOT architectures. Normally we would just put the
+ // address of GOT into ebx and then call target@PLT. But for tail calls
+ // ebx would be restored (since ebx is callee saved) before jumping to the
+ // target@PLT.
+
+ // Note: The actual moving to ECX is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee, DAG);
+ }
+ }
+
+ if (Is64Bit && isVarArg && !IsWin64) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasXMM() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
+ DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
+ SmallVector<SDValue, 8> MemOpChains2;
+ SDValue FIN;
+ int FI = 0;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ if (GuaranteedTailCallOpt) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc())
+ continue;
+ assert(VA.isMemLoc());
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+ ArgChain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Copy arguments to their registers.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ FPDiff, dl);
+ }
+
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ const GlobalValue *GV = G->getGlobal();
+ if (!GV->hasDLLImportLinkage()) {
+ unsigned char OpFlags = 0;
+ bool ExtraLoad = false;
+ unsigned WrapperKind = ISD::DELETED_NODE;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ isa<Function>(GV) &&
+ cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ // If the function is marked as non-lazy, generate an indirect call
+ // which loads from the GOT directly. This avoids runtime overhead
+ // at the cost of eager binding (and one extra byte of encoding).
+ OpFlags = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ ExtraLoad = true;
+ }
+
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+ G->getOffset(), OpFlags);
+
+ // Add a wrapper if needed.
+ if (WrapperKind != ISD::DELETED_NODE)
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ // Add extra indirection if needed.
+ if (ExtraLoad)
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ OpFlags);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+
+ if (!IsSibcall && isTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!isTailCall && Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+ if (Is64Bit && isVarArg && !IsWin64)
+ Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ if (isTailCall) {
+ // We used to do:
+ //// If this is the first return lowered for this function, add the regs
+ //// to the liveout set for the function.
+ // This isn't right, although it's probably harmless on x86; liveouts
+ // should be computed from returns not tail calls. Consider a void
+ // function making a tail call to a function returning int.
+ return DAG.getNode(X86ISD::TC_RETURN, dl,
+ NodeTys, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
+ // If this is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
+
+ // Returns a flag for retval copy to use.
+ if (!IsSibcall) {
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ uint64_t SlotSize = TD->getPointerSize();
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ return Offset;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const X86InstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ unsigned Opcode = Def->getOpcode();
+ if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+ Def->getOperand(1).isFI()) {
+ FI = Def->getOperand(1).getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+ FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+ FI = FINode->getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!IsTailCallConvention(CalleeCC) &&
+ CalleeCC != CallingConv::C)
+ return false;
+
+ // If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ if (GuaranteedTailCallOpt) {
+ if (IsTailCallConvention(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that.
+ if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ return false;
+
+ // Do not sibcall optimize vararg calls unless all arguments are passed via
+ // registers.
+ if (isVarArg && !Outs.empty()) {
+
+ // Optimizing for varargs on Win64 is unlikely to be safe without
+ // additional testing.
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
+ // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+ // Therefore if it's not used by the call it is not safe to optimize this into
+ // a sibcall.
+ bool Unused = false;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (!Ins[i].Used) {
+ Unused = true;
+ break;
+ }
+ }
+ if (Unused) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+ }
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+ return false;
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+
+ // If the tailcall address may be in a register, then make sure it's
+ // possible to register allocate for it. In 32-bit, the call address can
+ // only target EAX, EDX, or ECX since the tail call must be scheduled after
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget->is64Bit() &&
+ !isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
+ unsigned NumInRegs = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc())
+ continue;
+ unsigned Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == 3)
+ return false;
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return X86::createFastISel(funcInfo);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+static bool MayFoldLoad(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
+ case X86ISD::SHUFPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
+ return true;
+ }
+ return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
+ return DAG.getNode(Opc, dl, VT, V1);
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PALIGN:
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ case X86ISD::VPERM2F128:
+ return DAG.getNode(Opc, dl, VT, V1, V2,
+ DAG.getConstant(TargetMask, MVT::i8));
+ }
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return DAG.getNode(Opc, dl, VT, V1, V2);
+ }
+ return SDValue();
+}
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ false);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement) {
+ // Offset should fit into 32 bit immediate field.
+ if (!isInt<32>(Offset))
+ return false;
+
+ // If we don't have a symbolic displacement - we don't have any extra
+ // restrictions.
+ if (!hasSymbolicDisplacement)
+ return true;
+
+ // FIXME: Some tweaks might be needed for medium code model.
+ if (M != CodeModel::Small && M != CodeModel::Kernel)
+ return false;
+
+ // For small code model we assume that latest object is 16MB before end of 31
+ // bits boundary. We may also accept pretty large negative constants knowing
+ // that all objects are in the positive half of address space.
+ if (M == CodeModel::Small && Offset < 16*1024*1024)
+ return true;
+
+ // For kernel code model we know that all object resist in the negative half
+ // of 32bits address space. We may not accept negative offsets, since they may
+ // be just off and we may accept pretty large positive ones.
+ if (M == CodeModel::Kernel && Offset > 0)
+ return true;
+
+ return false;
+}
+
+/// isCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !is64Bit;
+ case CallingConv::X86_FastCall:
+ return !is64Bit;
+ case CallingConv::X86_ThisCall:
+ return !is64Bit;
+ case CallingConv::Fast:
+ return TailCallOpt;
+ case CallingConv::GHC:
+ return TailCallOpt;
+ }
+}
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+ SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+ if (!isFP) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ // X > -1 -> X == 0, jump !sign.
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_NS;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ // X < 0 -> X == 0, jump on sign.
+ return X86::COND_S;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ // X < 1 -> X <= 0
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_LE;
+ }
+ }
+
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+ }
+
+ // First determine if it is required or is profitable to flip the operands.
+
+ // If LHS is a foldable load, but RHS is not, flip the condition.
+ if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+ !ISD::isNON_EXTLoad(RHS.getNode())) {
+ SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+ std::swap(LHS, RHS);
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Condcode should be pre-legalized away");
+ case ISD::SETUEQ:
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETOLT: // flipped
+ case ISD::SETOGT:
+ case ISD::SETGT: return X86::COND_A;
+ case ISD::SETOLE: // flipped
+ case ISD::SETOGE:
+ case ISD::SETGE: return X86::COND_AE;
+ case ISD::SETUGT: // flipped
+ case ISD::SETULT:
+ case ISD::SETLT: return X86::COND_B;
+ case ISD::SETUGE: // flipped
+ case ISD::SETULE:
+ case ISD::SETLE: return X86::COND_BE;
+ case ISD::SETONE:
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETUO: return X86::COND_P;
+ case ISD::SETO: return X86::COND_NP;
+ case ISD::SETOEQ:
+ case ISD::SETUNE: return X86::COND_INVALID;
+ }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+ switch (X86CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
+ if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+ return true;
+ }
+ return false;
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+ return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrInRange - Return true if every element in Mask, begining
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// range (L, L+Pos]. or is undef.
+static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low, int Hi) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i)
+ if (!isUndefOrInRange(Mask[i], Low, Hi))
+ return false;
+ return true;
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+ if (Val < 0 || Val == CmpVal)
+ return true;
+ return false;
+}
+
+/// isSequentialOrUndefInRange - Return true if every element in Mask, begining
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// sequential range (L, L+Pos]. or is undef.
+static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (!isUndefOrEqual(Mask[i], Low))
+ return false;
+ return true;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 )
+ return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return (Mask[0] < 2 && Mask[1] < 2);
+ return false;
+}
+
+bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFDMask(M, N->getValueType(0));
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Lower quadword copied in order or undef.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Upper quadword shuffled.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFHWMask(M, N->getValueType(0));
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Upper quadword copied in order.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Lower quadword shuffled.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 4)
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFLWMask(M, N->getValueType(0));
+}
+
+/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PALIGNR.
+static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool hasSSSE3OrAVX) {
+ int i, e = VT.getVectorNumElements();
+ if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+ return false;
+
+ // Do not handle v2i64 / v2f64 shuffles with palignr.
+ if (e < 4 || !hasSSSE3OrAVX)
+ return false;
+
+ for (i = 0; i != e; ++i)
+ if (Mask[i] >= 0)
+ break;
+
+ // All undef, not a palignr.
+ if (i == e)
+ return false;
+
+ // Make sure we're shifting in the right direction.
+ if (Mask[i] <= i)
+ return false;
+
+ int s = Mask[i] - i;
+
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != e; ++i) {
+ int m = Mask[i];
+ if (m >= 0 && m != s+i)
+ return false;
+ }
+ return true;
+}
+
+/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// VSHUFPSY.
+static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ if (NumElems != 8)
+ return false;
+
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
+ // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
+ //
+ // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
+ // Y3..Y0, Y3..Y0, X3..X0, X3..X0
+ //
+ int QuarterSize = NumElems/4;
+ int HalfSize = QuarterSize*2;
+ for (int i = 0; i < QuarterSize; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, HalfSize))
+ return false;
+ for (int i = QuarterSize; i < QuarterSize*2; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
+ return false;
+
+ // The mask of the second half must be the same as the first but with
+ // the appropriate offsets. This works in the same way as VPERMILPS
+ // works with masks.
+ for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
+ if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
+ return false;
+ int FstHalfIdx = i-HalfSize;
+ if (Mask[FstHalfIdx] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
+ return false;
+ }
+ for (int i = QuarterSize*3; i < NumElems; ++i) {
+ if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
+ return false;
+ int FstHalfIdx = i-HalfSize;
+ if (Mask[FstHalfIdx] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
+ return false;
+
+ }
+
+ return true;
+}
+
+/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
+static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
+ "Only supports v8i32 and v8f32 types");
+
+ int HalfSize = NumElems/2;
+ unsigned Mask = 0;
+ for (int i = 0; i != NumElems ; ++i) {
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ // The mask of the first half must be equal to the second one.
+ unsigned Shamt = (i%HalfSize)*2;
+ unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
+ Mask |= Elt << Shamt;
+ }
+
+ return Mask;
+}
+
+/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
+/// version and the mask of the second half isn't binded with the first
+/// one.
+static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ if (NumElems != 4)
+ return false;
+
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X3 X2 X1 X0
+ // SRC2 => Y3 Y2 Y1 Y0
+ //
+ // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
+ //
+ int QuarterSize = NumElems/4;
+ int HalfSize = QuarterSize*2;
+ for (int i = 0; i < QuarterSize; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, HalfSize))
+ return false;
+ for (int i = QuarterSize; i < QuarterSize*2; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
+ return false;
+ for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
+ if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
+ return false;
+ for (int i = QuarterSize*3; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
+ return false;
+
+ return true;
+}
+
+/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
+static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
+ "Only supports v4i64 and v4f64 types");
+
+ int HalfSize = NumElems/2;
+ unsigned Mask = 0;
+ for (int i = 0; i != NumElems ; ++i) {
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ int Elt = SVOp->getMaskElt(i) % HalfSize;
+ Mask |= Elt << i;
+ }
+
+ return Mask;
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// SHUFPS and SHUFPD.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+
+ return true;
+}
+
+bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isSHUFPMask(M, N->getValueType(0));
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ return true;
+}
+
+static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedSHUFPMask(M, N->getValueType(0));
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ if (NumElems != 4)
+ return false;
+
+ // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getMaskElt(0), 6) &&
+ isUndefOrEqual(N->getMaskElt(1), 7) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+ return false;
+
+ for (unsigned i = NumElems/2; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ return true;
+}
+
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if ((NumElems != 2 && NumElems != 4)
+ || N->getValueType(0).getSizeInBits() > 128)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+ return false;
+
+ return true;
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ unsigned Start = 0;
+ unsigned End = NumLaneElts;
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = Start, j = s * NumLaneElts;
+ i != End;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
+ }
+ // Process the next 128 bits.
+ Start += NumLaneElts;
+ End += NumLaneElts;
+ }
+
+ return true;
+}
+
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ unsigned Start = 0;
+ unsigned End = NumLaneElts;
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != End; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j+NumElts))
+ return false;
+ }
+ }
+ // Process the next 128 bits.
+ Start += NumLaneElts;
+ End += NumLaneElts;
+ }
+ return true;
+}
+
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
+ // FIXME: Need a better way to get rid of this, there's no latency difference
+ // between UNPCKLPD and MOVDDUP, the later should always be checked first and
+ // the former later. We should also remove the "_undef" special mask.
+ if (NumElems == 4 && VT.getSizeInBits() == 256)
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElems / NumLanes;
+
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
+ i != NumLaneElts * (s + 1);
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT.getVectorElementType().getSizeInBits() < 32)
+ return false;
+
+ int NumElts = VT.getVectorNumElements();
+
+ if (!isUndefOrEqual(Mask[0], NumElts))
+ return false;
+
+ for (int i = 1; i < NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ return true;
+}
+
+bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isMOVLMask(M, N->getValueType(0));
+}
+
+/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// as permutations between 128-bit chunks or halves. As an example: this
+/// shuffle bellow:
+/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
+/// The first half comes from the second half of V1 and the second half from the
+/// the second half of V2.
+static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ // The shuffle result is divided into half A and half B. In total the two
+ // sources have 4 halves, namely: C, D, E, F. The final values of A and
+ // B must come from C, D, E or F.
+ int HalfSize = VT.getVectorNumElements()/2;
+ bool MatchA = false, MatchB = false;
+
+ // Check if A comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ // Check if B comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
+static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int HalfSize = VT.getVectorNumElements()/2;
+
+ int FstHalf = 0, SndHalf = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ FstHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+ for (int i = HalfSize; i < HalfSize*2; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ SndHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+
+ return (FstHalf | (SndHalf << 4));
+}
+
+/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed.
+static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ if (!Subtarget->hasAVX())
+ return false;
+
+ // Only match 256-bit with 64-bit types
+ if (VT.getSizeInBits() != 256 || NumElts != 4)
+ return false;
+
+ // The mask on the high lane is independent of the low. Both can match
+ // any element in inside its own lane, but can't cross.
+ int LaneSize = NumElts/NumLanes;
+ for (int l = 0; l < NumLanes; ++l)
+ for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ int LaneStart = l*LaneSize;
+ if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+ return false;
+ }
+
+ return true;
+}
+
+/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed.
+static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+
+ if (!Subtarget->hasAVX())
+ return false;
+
+ // Only match 256-bit with 32-bit types
+ if (VT.getSizeInBits() != 256 || NumElts != 8)
+ return false;
+
+ // The mask on the high lane should be the same as the low. Actually,
+ // they can differ if any of the corresponding index in a lane is undef
+ // and the other stays in range.
+ int LaneSize = NumElts/NumLanes;
+ for (int i = 0; i < LaneSize; ++i) {
+ int HighElt = i+LaneSize;
+ bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
+ bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
+
+ if (!HighValid || !LowValid)
+ return false;
+ if (Mask[i] < 0 || Mask[HighElt] < 0)
+ continue;
+ if (Mask[HighElt]-Mask[i] != LaneSize)
+ return false;
+ }
+
+ return true;
+}
+
+/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
+static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+ int LaneSize = NumElts/NumLanes;
+
+ // Although the mask is equal for both lanes do it twice to get the cases
+ // where a mask will match because the same mask element is undef on the
+ // first half but valid on the second. This would get pathological cases
+ // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+ unsigned Mask = 0;
+ for (int l = 0; l < NumLanes; ++l) {
+ for (int i = 0; i < LaneSize; ++i) {
+ int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
+ if (MaskElt < 0)
+ continue;
+ if (MaskElt >= LaneSize)
+ MaskElt -= LaneSize;
+ Mask |= MaskElt << (i*2);
+ }
+ }
+
+ return Mask;
+}
+
+/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
+static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ unsigned Mask = 0;
+ int LaneSize = NumElts/NumLanes;
+ for (int l = 0; l < NumLanes; ++l)
+ for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ int MaskElt = SVOp->getMaskElt(i);
+ if (MaskElt < 0)
+ continue;
+ Mask |= (MaskElt-l*LaneSize) << i;
+ }
+
+ return Mask;
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool V2IsSplat = false, bool V2IsUndef = false) {
+ int NumOps = VT.getVectorNumElements();
+ if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], 0))
+ return false;
+
+ for (int i = 1; i < NumOps; ++i)
+ if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+ return false;
+
+ return true;
+}
+
+static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+ bool V2IsUndef = false) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
+ return false;
+
+ // The second vector must be undef
+ if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+ return false;
+
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
+ (VT.getSizeInBits() == 256 && NumElems != 8))
+ return false;
+
+ // "i+1" is the value the indexed mask element must have
+ for (unsigned i = 0; i < NumElems; i += 2)
+ if (!isUndefOrEqual(N->getMaskElt(i), i+1) ||
+ !isUndefOrEqual(N->getMaskElt(i+1), i+1))
+ return false;
+
+ return true;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
+ return false;
+
+ // The second vector must be undef
+ if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+ return false;
+
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
+ (VT.getSizeInBits() == 256 && NumElems != 8))
+ return false;
+
+ // "i" is the value the indexed mask element must have
+ for (unsigned i = 0; i < NumElems; i += 2)
+ if (!isUndefOrEqual(N->getMaskElt(i), i) ||
+ !isUndefOrEqual(N->getMaskElt(i+1), i))
+ return false;
+
+ return true;
+}
+
+/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// version of MOVDDUP.
+static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
+ !V2IsUndef || NumElts != 4)
+ return false;
+
+ for (int i = 0; i != NumElts/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), 0))
+ return false;
+ for (int i = NumElts/2; i != NumElts; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+ return false;
+ return true;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// version of MOVDDUP.
+bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ int e = VT.getVectorNumElements() / 2;
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+ return false;
+ return true;
+}
+
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+
+ unsigned Shift = (NumOperands == 4) ? 2 : 1;
+ unsigned Mask = 0;
+ for (int i = 0; i < NumOperands; ++i) {
+ int Val = SVOp->getMaskElt(NumOperands-i-1);
+ if (Val < 0) Val = 0;
+ if (Val >= NumOperands) Val -= NumOperands;
+ Mask |= Val;
+ if (i != NumOperands - 1)
+ Mask <<= Shift;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the last 4.
+ for (unsigned i = 7; i >= 4; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= (Val - 4);
+ if (i != 4)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the first 4.
+ for (int i = 3; i >= 0; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= Val;
+ if (i != 0)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VVT = N->getValueType(0);
+ unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+ int Val = 0;
+
+ unsigned i, e;
+ for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+ Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ break;
+ }
+ assert(Val - i > 0 && "PALIGNR imm should be positive");
+ return (Val - i) * EltSize;
+}
+
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ EVT VecVT = N->getOperand(0).getValueType();
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+ return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ EVT VecVT = N->getValueType(0);
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+ return Index / NumElemsPerChunk;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->isNullValue()) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = SVOp->getMaskElt(i);
+ if (idx < 0)
+ MaskVec.push_back(idx);
+ else if (idx < (int)NumElems)
+ MaskVec.push_back(idx + NumElems);
+ else
+ MaskVec.push_back(idx - NumElems);
+ }
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+ SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
+ unsigned NumElems = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
+ }
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
+ EVT VT = Op->getValueType(0);
+ if (VT.getSizeInBits() != 128)
+ return false;
+ if (VT.getVectorNumElements() != 4)
+ return false;
+ for (unsigned i = 0, e = 2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+ return false;
+ for (unsigned i = 2; i != 4; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+ return false;
+ return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+ ShuffleVectorSDNode *Op) {
+ EVT VT = Op->getValueType(0);
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+ return false;
+ // Is V2 is a vector load, don't do this transformation. We will try to use
+ // load folding shufps op.
+ if (ISD::isNON_EXTLoad(V2))
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i))
+ return false;
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+ return false;
+ return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ SDValue SplatValue = N->getOperand(0);
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i) != SplatValue)
+ return false;
+ return true;
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx >= (int)NumElems) {
+ unsigned Opc = V2.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
+ return false;
+ } else if (Idx >= 0) {
+ unsigned Opc = V1.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V1.getOperand(Idx)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build SSE zero vectors as <4 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
+ SDValue Vec;
+ if (VT.getSizeInBits() == 128) { // SSE
+ if (HasXMMInt) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ } else if (VT.getSizeInBits() == 256) { // AVX
+ // 256-bit logic and arithmetic instructions in AVX are
+ // all floating-point, no support for integer ops. Default
+ // to emitting fp zeroed vectors then.
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
+/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
+/// original type, ensuring they get CSE'd.
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ assert((VT.is128BitVector() || VT.is256BitVector())
+ && "Expected a 128-bit or 256-bit vector type");
+
+ SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Cst, Cst, Cst, Cst);
+
+ if (VT.is256BitVector()) {
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
+ Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Vec = Insert128BitVector(InsV, Vec,
+ DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> MaskVec;
+ SVOp->getMask(MaskVec);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (MaskVec[i] > (int)NumElems) {
+ MaskVec[i] = NumElems;
+ Changed = true;
+ }
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
+ SVOp->getOperand(1), &MaskVec[0]);
+ return SDValue(SVOp, 0);
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ Mask.push_back(i);
+ Mask.push_back(i + NumElems);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Half = NumElems/2;
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != Half; ++i) {
+ Mask.push_back(i + Half);
+ Mask.push_back(i + NumElems + Half);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
+// a generic shuffle instruction because the target has no such instructions.
+// Generate shuffles which repeat i16 and i8 several times until they can be
+// represented by v4f32 and then be manipulated by target suported shuffles.
+static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
+ EVT VT = V.getValueType();
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = V.getDebugLoc();
+
+ while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ V = getUnpackl(DAG, dl, VT, V, V);
+ } else {
+ V = getUnpackh(DAG, dl, VT, V, V);
+ EltNo -= NumElems/2;
+ }
+ NumElems >>= 1;
+ }
+ return V;
+}
+
+/// getLegalSplat - Generate a legal splat with supported x86 shuffles
+static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
+ EVT VT = V.getValueType();
+ DebugLoc dl = V.getDebugLoc();
+ assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
+ && "Vector size not supported");
+
+ if (VT.getSizeInBits() == 128) {
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
+ &SplatMask[0]);
+ } else {
+ // To use VPERMILPS to splat scalars, the second half of indicies must
+ // refer to the higher part, which is a duplication of the lower one,
+ // because VPERMILPS can only handle in-lane permutations.
+ int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
+ EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
+
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
+ V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
+ &SplatMask[0]);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
+}
+
+/// PromoteSplat - Splat is promoted to target supported vector shuffles.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+ EVT SrcVT = SV->getValueType(0);
+ SDValue V1 = SV->getOperand(0);
+ DebugLoc dl = SV->getDebugLoc();
+
+ int EltNo = SV->getSplatIndex();
+ int NumElems = SrcVT.getVectorNumElements();
+ unsigned Size = SrcVT.getSizeInBits();
+
+ assert(((Size == 128 && NumElems > 4) || Size == 256) &&
+ "Unknown how to promote splat for type");
+
+ // Extract the 128-bit part containing the splat element and update
+ // the splat element index when it refers to the higher register.
+ if (Size == 256) {
+ unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0;
+ V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
+ if (Idx > 0)
+ EltNo -= NumElems/2;
+ }
+
+ // All i16 and i8 vector types can't be used directly by a generic shuffle
+ // instruction because the target has no such instruction. Generate shuffles
+ // which repeat i16 and i8 several times until they fit in i32, and then can
+ // be manipulated by target suported shuffles.
+ EVT EltVT = SrcVT.getVectorElementType();
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ V1 = PromoteSplati8i16(V1, DAG, EltNo);
+
+ // Recreate the 256-bit vector and place the same 128-bit vector
+ // into the low and high part. This is necessary because we want
+ // to use VPERM* to shuffle the vectors
+ if (Size == 256) {
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ V1 = Insert128BitVector(InsV, V1,
+ DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ }
+
+ return getLegalSplat(DAG, V1, EltNo);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+ bool isZero, bool HasXMMInt,
+ SelectionDAG &DAG) {
+ EVT VT = V2.getValueType();
+ SDValue V1 = isZero
+ ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(i == Idx ? NumElems : i);
+ return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
+ if (Depth == 6)
+ return SDValue(); // Limit search depth.
+
+ SDValue V = SDValue(N, 0);
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+
+ // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+ if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ Index = SV->getMaskElt(Index);
+
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ int NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ }
+
+ // Recurse into target specific vector shuffles to find scalars.
+ if (isTargetShuffle(Opcode)) {
+ int NumElems = VT.getVectorNumElements();
+ SmallVector<unsigned, 16> ShuffleMask;
+ SDValue ImmN;
+
+ switch(Opcode) {
+ case X86ISD::SHUFPS:
+ case X86ISD::SHUFPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPSMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ DecodePUNPCKHMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
+ DecodeUNPCKHPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ DecodePUNPCKLMask(VT, ShuffleMask);
+ break;
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ DecodeUNPCKLPMask(VT, ShuffleMask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PSHUFD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector.
+ unsigned OpNum = (Index == 0) ? 1 : 0;
+ return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+ Depth+1);
+ }
+ case X86ISD::VPERMILPS:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPSY:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPDY:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERM2F128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::PALIGN:
+ return SDValue(); // Not yet implemented.
+ default:
+ assert(0 && "unknown target shuffle node");
+ return SDValue();
+ }
+
+ Index = ShuffleMask[Index];
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ Depth+1);
+ }
+
+ // Actual nodes that may contain scalar elements
+ if (Opcode == ISD::BITCAST) {
+ V = V.getOperand(0);
+ EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+ return SDValue();
+ }
+
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : DAG.getUNDEF(VT.getVectorElementType());
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+
+ return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two different directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+ bool ZerosFromLeft, SelectionDAG &DAG) {
+ int i = 0;
+
+ while (i < NumElems) {
+ unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+ SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ if (!(Elt.getNode() &&
+ (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+ break;
+ ++i;
+ }
+
+ return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+ int OpIdx, int NumElems, unsigned &OpNum) {
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+
+ for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ int Idx = SVOp->getMaskElt(i);
+ // Ignore undef indicies
+ if (Idx < 0)
+ continue;
+
+ if (Idx < NumElems)
+ SeenV1 = true;
+ else
+ SeenV2 = true;
+
+ // Only accept consecutive elements from the same vector
+ if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
+ return false;
+ }
+
+ OpNum = SeenV1 ? 0 : 1;
+ return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ false /* check zeros from right */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // V1 = {X, A, B, C} 0
+ // \ \ \ /
+ // vector_shuffle V1, V2 <1, 2, 3, X>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ 0, // Mask Start Index
+ NumElems-NumZeros-1, // Mask End Index
+ NumZeros, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = false;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ true /* check zeros from left */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // 0 { A, B, X, X } = V2
+ // / \ / /
+ // vector_shuffle V1, V2 <X, X, 4, 5>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ NumZeros, // Mask Start Index
+ NumElems-1, // Mask End Index
+ 0, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = true;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ // Although the logic below support any bitwidth size, there are no
+ // shift instructions which handle more than 128-bit vectors.
+ if (SVOp->getValueType(0).getSizeInBits() > 128)
+ return false;
+
+ if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+ isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+ return true;
+
+ return false;
+}
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (NumNonZero > 8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+ if (ThisIsNonZero && First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+
+ if ((i & 1) != 0) {
+ SDValue ThisElt(0, 0), LastElt(0, 0);
+ bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ if (LastIsNonZero) {
+ LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i16, Op.getOperand(i-1));
+ }
+ if (ThisIsNonZero) {
+ ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+ ThisElt, DAG.getConstant(8, MVT::i8));
+ if (LastIsNonZero)
+ ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+ } else
+ ThisElt = LastElt;
+
+ if (ThisElt.getNode())
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i/2));
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (NumNonZero > 4)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 8; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v8i16, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI, DebugLoc dl) {
+ assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
+ EVT ShVT = MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(Opc, dl, ShVT, SrcOp,
+ DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(SrcOp.getValueType()))));
+}
+
+SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) const {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (DAG.isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ // FIXME: 256-bit vector instructions don't require a strict alignment,
+ // improve this code to support it better.
+ unsigned RequiredAlign = VT.getSizeInBits()/8;
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16 or 32.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. FIXME: It's possible to compute
+ // the exact stack offset and reference FI + adjust offset instead.
+ // If someone *really* cares about this. That's the way to implement it.
+ return SDValue();
+ } else {
+ MFI->setObjectAlignment(FI, RequiredAlign);
+ }
+ }
+
+ // (Offset % 16 or 32) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % RequiredAlign) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~(RequiredAlign-1);
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int NumElems = VT.getVectorNumElements();
+
+ EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
+ SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(StartOffset),
+ false, false, 0);
+
+ // Canonicalize it to a v4i32 or v8i32 shuffle.
+ SmallVector<int, 8> Mask;
+ for (int i = 0; i < NumElems; ++i)
+ Mask.push_back(EltNo);
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, NVT,
+ DAG.getVectorShuffle(CanonVT, dl, V1,
+ DAG.getUNDEF(CanonVT),&Mask[0]));
+ }
+
+ return SDValue();
+}
+
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+ DebugLoc &DL, SelectionDAG &DAG) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = Elts.size();
+
+ LoadSDNode *LDBase = NULL;
+ unsigned LastLoadedElt = -1U;
+
+ // For each element in the initializer, see if we've found a load or an undef.
+ // If we don't find an initial load element, or later load elements are
+ // non-consecutive, bail out.
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Elts[i];
+
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return SDValue();
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+ return SDValue();
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+ return SDValue();
+ LastLoadedElt = i;
+ }
+
+ // If we have found an entire vector of loads and undefs, then return a large
+ // load of the entire vector width starting at the base pointer. If we found
+ // consecutive loads for the low half, generate a vzext_load node.
+ if (LastLoadedElt == NumElems - 1) {
+ if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->getAlignment());
+ } else if (NumElems == 4 && LastLoadedElt == 1 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+ LDBase->getPointerInfo(),
+ LDBase->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT VT = Op.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+ unsigned NumElems = Op.getNumOperands();
+
+ // Vectors containing all zeros can be matched by pxor and xorps later
+ if (ISD::isBuildVectorAllZeros(Op.getNode())) {
+ // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
+ // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 ||
+ Op.getValueType() == MVT::v8i32)
+ return Op;
+
+ return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl);
+ }
+
+ // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
+ // vectors or broken into v4i32 operations on 256-bit vectors.
+ if (ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (Op.getValueType() == MVT::v4i32)
+ return Op;
+
+ return getOnesVector(Op.getValueType(), DAG, dl);
+ }
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ bool IsAllConstants = true;
+ SmallSet<SDValue, 8> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ IsAllConstants = false;
+ if (X86::isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
+ }
+ }
+
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NumNonZero == 0)
+ return DAG.getUNDEF(VT);
+
+ // Special case for single non-zero, non-undef, element.
+ if (NumNonZero == 1) {
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle SSE only.
+ assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+ EVT VecVT = MVT::v4i32;
+ unsigned VecElts = 4;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasXMMInt(), DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SmallVector<int, 4> Mask;
+ Mask.push_back(Idx);
+ for (unsigned i = 1; i != VecElts; ++i)
+ Mask.push_back(i);
+ Item = DAG.getVectorShuffle(VecVT, dl, Item,
+ DAG.getUNDEF(Item.getValueType()),
+ &Mask[0]);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+ }
+ }
+
+ // If we have a constant or non-constant insertion into the low element of
+ // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+ // the rest of the elements. This will be matched as movd/movq/movss/movsd
+ // depending on what the source datatype is.
+ if (Idx == 0) {
+ if (NumZero == 0) {
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+ (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(),
+ DAG);
+ } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+ Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ EVT MiddleVT = MVT::v4i32;
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasXMMInt(), DAG);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+ }
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ X86::isZeroNode(Op.getOperand(0)) &&
+ !X86::isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = VT.getSizeInBits();
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this, dl);
+ }
+
+ if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+ return SDValue();
+
+ // Otherwise, if this is a vector with i32 or f32 elements, and the element
+ // is a non-constant being inserted into an element other than the low one,
+ // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
+ // movd/movss) to move this into the low element, then shuffle it into
+ // place.
+ if (EVTBits == 32) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasXMMInt(), DAG);
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i++)
+ MaskVec.push_back(i == Idx ? 0 : 1);
+ return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+ }
+ }
+
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
+ return SDValue();
+ }
+
+ // A vector full of immediates; various special cases are already
+ // handled, so this is best done with a single constant-pool load.
+ if (IsAllConstants)
+ return SDValue();
+
+ // For AVX-length vectors, build the individual 128-bit pieces and use
+ // shuffles to put them in place.
+ if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 32> V;
+ for (unsigned i = 0; i < NumElems; ++i)
+ V.push_back(Op.getOperand(i));
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build both the lower and upper subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ // Recreate the wider vector with the lower and upper part.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ // Let legalizer expand 2-wide build_vectors.
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true,
+ Subtarget->hasXMMInt(), DAG);
+ }
+ return SDValue();
+ }
+
+ // If element VT is < 32 bits, convert it to inserts into a zero vector.
+ if (EVTBits == 8 && NumElems == 16) {
+ SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ if (EVTBits == 16 && NumElems == 8) {
+ SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ // If element VT is == 32 bits, turn it into a number of shuffles.
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ if (NumElems == 4 && NumZero > 0) {
+ for (unsigned i = 0; i < 4; ++i) {
+ bool isZero = !(NonZeros & (1 << i));
+ if (isZero)
+ V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
+ else
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ }
+
+ for (unsigned i = 0; i < 2; ++i) {
+ switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+ default: break;
+ case 0:
+ V[i] = V[i*2]; // Must be a zero vector.
+ break;
+ case 1:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+ break;
+ case 2:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ case 3:
+ V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ }
+ }
+
+ SmallVector<int, 8> MaskVec;
+ bool Reverse = (NonZeros & 0x3) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i : i);
+ Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+ return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+ }
+
+ if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ // Check for a build vector of consecutive loads.
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = Op.getOperand(i);
+
+ // Check for elements which are consecutive loads.
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ if (LD.getNode())
+ return LD;
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
+ if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) {
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
+ Op.getOperand(i), DAG.getIntPtrConstant(i));
+ }
+ return Result;
+ }
+
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i) {
+ // If V[i+EltStride] is undef and this is the first round of mixing,
+ // then it is safe to just drop this shuffle: V[i] is already in the
+ // right place, the one element (since it's the first round) being
+ // inserted as undef can be dropped. This isn't safe for successive
+ // rounds because they will permute elements within both vectors.
+ if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+ EltStride == NumElems/2)
+ continue;
+
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ }
+ EltStride >>= 1;
+ }
+ return V[0];
+ }
+ return SDValue();
+}
+
+// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
+// them in a MMX register. This is better than doing a stack convert.
+static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ResVT = Op.getValueType();
+
+ assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
+ ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
+ int Mask[2];
+ SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
+ SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+ InVec = Op.getOperand(1);
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ unsigned NumElts = ResVT.getVectorNumElements();
+ VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
+ InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
+ } else {
+ InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
+ SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+ Mask[0] = 0; Mask[1] = 2;
+ VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+}
+
+// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// to create 256-bit vectors from two other 128-bit ones.
+static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ResVT = Op.getValueType();
+
+ assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = ResVT.getVectorNumElements();
+
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+}
+
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ EVT ResVT = Op.getValueType();
+
+ assert(Op.getNumOperands() == 2);
+ assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
+ "Unsupported CONCAT_VECTORS for value type");
+
+ // We support concatenate two MMX registers and place them in a MMX register.
+ // This is better than doing a stack convert.
+ if (ResVT.is128BitVector())
+ return LowerMMXCONCAT_VECTORS(Op, DAG);
+
+ // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
+ // from two other 128-bit ones.
+ return LowerAVXCONCAT_VECTORS(Op, DAG);
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all] pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 8> MaskVals;
+
+ // Determine if more than 1 of the words in each of the low and high quadwords
+ // of the result come from the same quadword of one of the two inputs. Undef
+ // mask values count as coming from any quadword, for better codegen.
+ SmallVector<unsigned, 4> LoQuad(4);
+ SmallVector<unsigned, 4> HiQuad(4);
+ BitVector InputQuads(4);
+ for (unsigned i = 0; i < 8; ++i) {
+ SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+ int EltIdx = SVOp->getMaskElt(i);
+ MaskVals.push_back(EltIdx);
+ if (EltIdx < 0) {
+ ++Quad[0];
+ ++Quad[1];
+ ++Quad[2];
+ ++Quad[3];
+ continue;
+ }
+ ++Quad[EltIdx / 4];
+ InputQuads.set(EltIdx / 4);
+ }
+
+ int BestLoQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LoQuad[i] > MaxQuad) {
+ BestLoQuad = i;
+ MaxQuad = LoQuad[i];
+ }
+ }
+
+ int BestHiQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HiQuad[i] > MaxQuad) {
+ BestHiQuad = i;
+ MaxQuad = HiQuad[i];
+ }
+ }
+
+ // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+ // of the two input vectors, shuffle them into one input vector so only a
+ // single pshufb instruction is necessary. If There are more than 2 input
+ // quads, disable the next transformation since it does not help SSSE3.
+ bool V1Used = InputQuads[0] || InputQuads[1];
+ bool V2Used = InputQuads[2] || InputQuads[3];
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ if (InputQuads.count() == 2 && V1Used && V2Used) {
+ BestLoQuad = InputQuads.find_first();
+ BestHiQuad = InputQuads.find_next(BestLoQuad);
+ }
+ if (InputQuads.count() > 2) {
+ BestLoQuad = -1;
+ BestHiQuad = -1;
+ }
+ }
+
+ // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+ // the shuffle mask. If a quad is scored as -1, that means that it contains
+ // words from all 4 input quadwords.
+ SDValue NewV;
+ if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
+ MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+ NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
+
+ // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+ // source words for the shuffle, to aid later transformations.
+ bool AllWordsInNewV = true;
+ bool InOrder[2] = { true, true };
+ for (unsigned i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx != (int)i)
+ InOrder[i/4] = false;
+ if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+ continue;
+ AllWordsInNewV = false;
+ break;
+ }
+
+ bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+ if (AllWordsInNewV) {
+ for (int i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0)
+ continue;
+ idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+ if ((idx != i) && idx < 4)
+ pshufhw = false;
+ if ((idx != i) && idx > 3)
+ pshuflw = false;
+ }
+ V1 = NewV;
+ V2Used = false;
+ BestLoQuad = 0;
+ BestHiQuad = 1;
+ }
+
+ // If we've eliminated the use of V2, and the new mask is a pshuflw or
+ // pshufhw, that's as cheap as it gets. Return the new shuffle.
+ if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+ unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+ unsigned TargetMask = 0;
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+ X86::getShufflePSHUFLWImmediate(NewV.getNode());
+ V1 = NewV.getOperand(0);
+ return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
+ }
+ }
+
+ // If we have SSSE3, and all words of the result are from 1 input vector,
+ // case 2 is generated, otherwise case 3 is generated. If no SSSE3
+ // is present, fall back to case 4.
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If we have elements from both input vectors, set the high bit of the
+ // shuffle mask element to zero out elements that come from V2 in the V1
+ // mask, and elements that come from V1 in the V2 mask, so that the two
+ // results can be OR'd together.
+ bool TwoInputs = V1Used && V2Used;
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (TwoInputs && (EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+ }
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+ }
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ }
+
+ // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+ // and update MaskVals with new element order.
+ BitVector InOrder(8);
+ if (BestLoQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (int i = 0; i != 4; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestLoQuad) {
+ MaskV.push_back(idx & 3);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ MaskV.push_back(i);
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+ DAG);
+ }
+
+ // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+ // and update MaskVals with the new element order.
+ if (BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (unsigned i = 0; i != 4; ++i)
+ MaskV.push_back(i);
+ for (unsigned i = 4; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestHiQuad) {
+ MaskV.push_back((idx & 3) + 4);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+ DAG);
+ }
+
+ // In case BestHi & BestLo were both -1, which means each quadword has a word
+ // from each of the four input quadwords, calculate the InOrder bitvector now
+ // before falling through to the insert/extract cleanup.
+ if (BestLoQuad == -1 && BestHiQuad == -1) {
+ NewV = V1;
+ for (int i = 0; i != 8; ++i)
+ if (MaskVals[i] < 0 || MaskVals[i] == i)
+ InOrder.set(i);
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ SDValue ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+ DAG.getIntPtrConstant(EltIdx))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+ DAG.getIntPtrConstant(EltIdx - 8));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+ DAG.getIntPtrConstant(i));
+ }
+ return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ const X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 16> MaskVals;
+ SVOp->getMask(MaskVals);
+
+ // If we have SSSE3, case 1 is generated when all result bytes come from
+ // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
+ // present, fall back to case 3.
+ // FIXME: kill V2Only once shuffles are canonizalized by getNode.
+ bool V1Only = true;
+ bool V2Only = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ if (EltIdx < 16)
+ V2Only = false;
+ else
+ V1Only = false;
+ }
+
+ // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+ if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If all result elements are from one input vector, then only translate
+ // undef mask values to 0x80 (zero out result) in the pshufb mask.
+ //
+ // Otherwise, we have elements from both input vectors, and must zero out
+ // elements that come from V2 in the first mask, and V1 in the second mask
+ // so that we can OR them together.
+ bool TwoInputs = !(V1Only || V2Only);
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ // If all the elements are from V2, assign it to V1 and return after
+ // building the first pshufb.
+ if (V2Only)
+ V1 = V2;
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return V1;
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ }
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ }
+
+ // No SSSE3 - Calculate in place words and then fix all out of place words
+ // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
+ // the 16 different words that comprise the two doublequadword input vectors.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+ SDValue NewV = V2Only ? V2 : V1;
+ for (int i = 0; i != 8; ++i) {
+ int Elt0 = MaskVals[i*2];
+ int Elt1 = MaskVals[i*2+1];
+
+ // This word of the result is all undef, skip it.
+ if (Elt0 < 0 && Elt1 < 0)
+ continue;
+
+ // This word of the result is already in the correct place, skip it.
+ if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
+ continue;
+ if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+ continue;
+
+ SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+ SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+ SDValue InsElt;
+
+ // If Elt0 and Elt1 are defined, are consecutive, and can be load
+ // using a single extract together, load it and store it.
+ if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ continue;
+ }
+
+ // If Elt1 is defined, extract it from the appropriate source. If the
+ // source byte is not also odd, shift the extracted word left 8 bits
+ // otherwise clear the bottom 8 bits if we need to do an or.
+ if (Elt1 >= 0) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ if ((Elt1 & 1) == 0)
+ InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt.getValueType())));
+ else if (Elt0 >= 0)
+ InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+ DAG.getConstant(0xFF00, MVT::i16));
+ }
+ // If Elt0 is defined, extract it from the appropriate source. If the
+ // source byte is not also even, shift the extracted word right 8 bits. If
+ // Elt1 was also defined, OR the extracted values together before
+ // inserting them in the result.
+ if (Elt0 >= 0) {
+ SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+ if ((Elt0 & 1) != 0)
+ InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt0.getValueType())));
+ else if (Elt1 >= 0)
+ InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+ DAG.getConstant(0x00FF, MVT::i16));
+ InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+ : InsElt0;
+ }
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG, DebugLoc dl) {
+ EVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ EVT NewVT;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Unexpected!");
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ }
+
+ int Scale = NumElems / NewWidth;
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ int StartIdx = -1;
+ for (int j = 0; j < Scale; ++j) {
+ int EltIdx = SVOp->getMaskElt(i+j);
+ if (EltIdx < 0)
+ continue;
+ if (StartIdx == -1)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDValue();
+ }
+ if (StartIdx == -1)
+ MaskVec.push_back(-1);
+ else
+ MaskVec.push_back(StartIdx / Scale);
+ }
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(EVT VT, EVT OpVT,
+ SDValue SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, DebugLoc dl) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ OpVT,
+ SrcOp.getOperand(0)
+ .getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ OpVT, SrcOp)));
+}
+
+/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
+/// shuffle node referes to only one lane in the sources.
+static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ int HalfSize = NumElems/2;
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+ bool MatchA = false, MatchB = false;
+
+ for (int l = 0; l < NumElems*2; l += HalfSize) {
+ if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ for (int l = 0; l < NumElems*2; l += HalfSize) {
+ if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
+/// which could not be matched by any known target speficic shuffle
+static SDValue
+LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
+ // If each half of a vector shuffle node referes to only one lane in the
+ // source vectors, extract each used 128-bit lane and shuffle them using
+ // 128-bit shuffles. Then, concatenate the results. Otherwise leave
+ // the work to the legalizer.
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ int HalfSize = NumElems/2;
+
+ // Extract the reference for each half
+ int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
+ int FstVecOpNum = 0, SndVecOpNum = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ FstVecOpNum = Elt/NumElems;
+ FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
+ break;
+ }
+ for (int i = HalfSize; i < NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ SndVecOpNum = Elt/NumElems;
+ SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
+ break;
+ }
+
+ // Extract the subvectors
+ SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
+ DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
+ DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
+
+ // Generate 128-bit shuffles
+ SmallVector<int, 16> MaskV1, MaskV2;
+ for (int i = 0; i < HalfSize; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ }
+ for (int i = HalfSize; i < NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ }
+
+ EVT NVT = V1.getValueType();
+ V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
+ V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
+
+ // Concatenate the result back
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ return SDValue();
+}
+
+/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
+/// 4 elements, and match them with several different shuffle types.
+static SDValue
+LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = SVOp->getValueType(0);
+
+ assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+
+ SmallVector<std::pair<int, int>, 8> Locs;
+ Locs.resize(4);
+ SmallVector<int, 8> Mask1(4U, -1);
+ SmallVector<int, 8> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned NumHi = 0;
+ unsigned NumLo = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else {
+ assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+ if (Idx < 4) {
+ Locs[i] = std::make_pair(0, NumLo);
+ Mask1[NumLo] = Idx;
+ NumLo++;
+ } else {
+ Locs[i] = std::make_pair(1, NumHi);
+ if (2+NumHi < 4)
+ Mask1[2+NumHi] = Idx;
+ NumHi++;
+ }
+ }
+ }
+
+ if (NumLo <= 2 && NumHi <= 2) {
+ // If no more than two elements come from either vector. This can be
+ // implemented with two shuffles. First shuffle gather the elements.
+ // The second shuffle, which takes the first shuffle as both of its
+ // vector operands, put the elements into the right order.
+ V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ SmallVector<int, 8> Mask2(4U, -1);
+
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1)
+ continue;
+ else {
+ unsigned Idx = (i < 2) ? 0 : 4;
+ Idx += Locs[i].first * 2 + Locs[i].second;
+ Mask2[i] = Idx;
+ }
+ }
+
+ return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+ } else if (NumLo == 3 || NumHi == 3) {
+ // Otherwise, we must have three elements from one vector, call it X, and
+ // one element from the other, call it Y. First, use a shufps to build an
+ // intermediate vector with the one element from Y and the element from X
+ // that will be in the same half in the final destination (the indexes don't
+ // matter). Then, use a shufps to build the final vector, taking the half
+ // containing the element from Y from the intermediate, and the other half
+ // from X.
+ if (NumHi == 3) {
+ // Normalize it so the 3 elements come from V1.
+ CommuteVectorShuffleMask(PermMask, VT);
+ std::swap(V1, V2);
+ }
+
+ // Find the element from V2.
+ unsigned HiIndex;
+ for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+ int Val = PermMask[HiIndex];
+ if (Val < 0)
+ continue;
+ if (Val >= 4)
+ break;
+ }
+
+ Mask1[0] = PermMask[HiIndex];
+ Mask1[1] = -1;
+ Mask1[2] = PermMask[HiIndex^1];
+ Mask1[3] = -1;
+ V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ if (HiIndex >= 2) {
+ Mask1[0] = PermMask[0];
+ Mask1[1] = PermMask[1];
+ Mask1[2] = HiIndex & 1 ? 6 : 4;
+ Mask1[3] = HiIndex & 1 ? 4 : 6;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ } else {
+ Mask1[0] = HiIndex & 1 ? 2 : 0;
+ Mask1[1] = HiIndex & 1 ? 0 : 2;
+ Mask1[2] = PermMask[2];
+ Mask1[3] = PermMask[3];
+ if (Mask1[2] >= 0)
+ Mask1[2] += 4;
+ if (Mask1[3] >= 0)
+ Mask1[3] += 4;
+ return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+ }
+ }
+
+ // Break it into (shuffle shuffle_hi, shuffle_lo).
+ Locs.clear();
+ Locs.resize(4);
+ SmallVector<int,8> LoMask(4U, -1);
+ SmallVector<int,8> HiMask(4U, -1);
+
+ SmallVector<int,8> *MaskPtr = &LoMask;
+ unsigned MaskIdx = 0;
+ unsigned LoIdx = 0;
+ unsigned HiIdx = 2;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (i == 2) {
+ MaskPtr = &HiMask;
+ MaskIdx = 1;
+ LoIdx = 0;
+ HiIdx = 2;
+ }
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else if (Idx < 4) {
+ Locs[i] = std::make_pair(MaskIdx, LoIdx);
+ (*MaskPtr)[LoIdx] = Idx;
+ LoIdx++;
+ } else {
+ Locs[i] = std::make_pair(MaskIdx, HiIdx);
+ (*MaskPtr)[HiIdx] = Idx;
+ HiIdx++;
+ }
+ }
+
+ SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+ SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+ SmallVector<int, 8> MaskOps;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1) {
+ MaskOps.push_back(-1);
+ } else {
+ unsigned Idx = Locs[i].first * 4 + Locs[i].second;
+ MaskOps.push_back(Idx);
+ }
+ }
+ return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+static bool MayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (MayFoldLoad(V))
+ return true;
+ return false;
+}
+
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (ISD::isNormalLoad(V.getNode()))
+ return true;
+ return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+ ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+ // Be sure that the vector shuffle is present in a pattern like this:
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+ if (!V.hasOneUse())
+ return false;
+
+ SDNode *N = *V.getNode()->use_begin();
+ if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ SDValue EltNo = N->getOperand(1);
+ if (!isa<ConstantSDNode>(EltNo))
+ return false;
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ bool HasShuffleIntoBitcast = false;
+ if (V.getOpcode() == ISD::BITCAST) {
+ EVT SrcVT = V.getOperand(0).getValueType();
+ if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+ return false;
+ V = V.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+ V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+ // Skip one more bit_convert if necessary
+ if (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (ISD::isNormalLoad(V.getNode())) {
+ // Is the original load suitable?
+ LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+ // FIXME: avoid the multi-use bug that is preventing lots of
+ // of foldings to be detected, this is still wrong of course, but
+ // give the temporary desired behavior, and if it happens that
+ // the load has real more uses, during isel it will not fold, and
+ // will generate poor code.
+ if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+ return false;
+
+ if (!HasShuffleIntoBitcast)
+ return true;
+
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return false;
+ }
+
+ return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+ bool HasXMMInt) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+ if (HasXMMInt && VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+ // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ "unsupported shuffle type");
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ // v4i32 or v4f32
+ return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static inline unsigned getSHUFPOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32:
+ case MVT::v4i32: // Use fp unit for int unpack.
+ case MVT::v4f32: return X86ISD::SHUFPS;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64:
+ case MVT::v2i64: // Use fp unit for int unpack.
+ case MVT::v2f64: return X86ISD::SHUFPD;
+ default:
+ llvm_unreachable("Unknown type for shufp*");
+ }
+ return 0;
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+
+ // Trivial case, when V2 comes from a load.
+ if (MayFoldVectorLoad(V2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ CanFoldLoad = true;
+
+ // Both of them can't be memory operations though.
+ if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+ CanFoldLoad = false;
+
+ if (CanFoldLoad) {
+ if (HasXMMInt && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if (HasXMMInt) {
+ // FIXME: isMOVLMask should be checked and matched before getMOVLP,
+ // as to remove this logic from here, as much as possible
+ if (NumElems == 2 || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64: return X86ISD::VUNPCKLPDY;
+ case MVT::v16i8: return X86ISD::PUNPCKLBW;
+ case MVT::v8i16: return X86ISD::PUNPCKLWD;
+ default:
+ llvm_unreachable("Unknown type for unpckl");
+ }
+ return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKHPS;
+ case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32: return X86ISD::VUNPCKHPSY;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64: return X86ISD::VUNPCKHPDY;
+ case MVT::v16i8: return X86ISD::PUNPCKHBW;
+ case MVT::v8i16: return X86ISD::PUNPCKHWD;
+ default:
+ llvm_unreachable("Unknown type for unpckh");
+ }
+ return 0;
+}
+
+static inline unsigned getVPERMILOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32:
+ case MVT::v4f32: return X86ISD::VPERMILPS;
+ case MVT::v2i64:
+ case MVT::v2f64: return X86ISD::VPERMILPD;
+ case MVT::v8i32:
+ case MVT::v8f32: return X86ISD::VPERMILPSY;
+ case MVT::v4i64:
+ case MVT::v4f64: return X86ISD::VPERMILPDY;
+ default:
+ llvm_unreachable("Unknown type for vpermil");
+ }
+ return 0;
+}
+
+/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
+/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
+/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
+static bool isVectorBroadcast(SDValue &Op) {
+ EVT VT = Op.getValueType();
+ bool Is256 = VT.getSizeInBits() == 256;
+
+ assert((VT.getSizeInBits() == 128 || Is256) &&
+ "Unsupported type for vbroadcast node");
+
+ SDValue V = Op;
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (Is256 && !(V.hasOneUse() &&
+ V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(0).getOpcode() == ISD::UNDEF))
+ return false;
+
+ if (Is256)
+ V = V.getOperand(1);
+
+ if (!V.hasOneUse())
+ return false;
+
+ // Check the source scalar_to_vector type. 256-bit broadcasts are
+ // supported for 32/64-bit sizes, while 128-bit ones are only supported
+ // for 32-bit scalars.
+ if (V.getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+
+ unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
+ if (ScalarSize != 32 && ScalarSize != 64)
+ return false;
+ if (!Is256 && ScalarSize == 64)
+ return false;
+
+ V = V.getOperand(0);
+ if (!MayFoldLoad(V))
+ return false;
+
+ // Return the load node
+ Op = V;
+ return true;
+}
+
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const X86Subtarget *Subtarget) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+
+ if (isZeroShuffle(SVOp))
+ return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
+
+ // Handle splat operations
+ if (SVOp->isSplat()) {
+ unsigned NumElem = VT.getVectorNumElements();
+ int Size = VT.getSizeInBits();
+ // Special case, this is the only place now where it's allowed to return
+ // a vector_shuffle operation without using a target specific node, because
+ // *hopefully* it will be optimized away by the dag combiner. FIXME: should
+ // this be moved to DAGCombine instead?
+ if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
+ return Op;
+
+ // Use vbroadcast whenever the splat comes from a foldable load
+ if (Subtarget->hasAVX() && isVectorBroadcast(V1))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+
+ // Handle splats by matching through known shuffle masks
+ if ((Size == 128 && NumElem <= 4) ||
+ (Size == 256 && NumElem < 8))
+ return SDValue();
+
+ // All remaning splats are promoted to target supported vector shuffles.
+ return PromoteSplat(SVOp, DAG);
+ }
+
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ if (NewOp.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
+ } else if ((VT == MVT::v4i32 ||
+ (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ if (NewOp.getNode()) {
+ if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+ DAG, Subtarget, dl);
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+ DAG, Subtarget, dl);
+ }
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+ bool HasXMMInt = Subtarget->hasXMMInt();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ // Shuffle operations on MMX not supported.
+ if (isMMX)
+ return Op;
+
+ // Vector shuffle lowering takes 3 steps:
+ //
+ // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled.
+ // 2) Matching of shuffles with known shuffle masks to x86 target specific
+ // shuffle nodes.
+ // 3) Rewriting of unmatched masks into new generic shuffle operations,
+ // so the shuffle can be broken into other shuffles and the legalizer can
+ // try the lowering again.
+ //
+ // The general ideia is that no vector_shuffle operation should be left to
+ // be matched during isel, all of them must be converted to a target specific
+ // node here.
+
+ // Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled. The actual code
+ // doesn't include all of those, work in progress...
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ if (NewOp.getNode())
+ return NewOp;
+
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isMOVDDUPMask(SVOp) &&
+ (Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ V2IsUndef && RelaxedMayFoldVectorLoad(V1))
+ return getMOVDDup(Op, dl, V1, DAG);
+
+ if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ // Use to match splats
+ if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isPSHUFDMask(SVOp)) {
+ // The actual implementation will match the mask in the if above and then
+ // during isel it can match several different instructions, not only pshufd
+ // as its name says, sad but true, emulate the behavior for now...
+ if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+ if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1,
+ TargetMask, DAG);
+ }
+
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDValue ShVal;
+ bool isShift = getSubtarget()->hasXMMInt() &&
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ EVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ if (X86::isMOVLMask(SVOp)) {
+ if (V1IsUndef)
+ return V2;
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+ if (!X86::isMOVLPMask(SVOp)) {
+ if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
+ }
+
+ // FIXME: fold these into legal mask.
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasXMMInt);
+
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ if (X86::isMOVSHDUPMask(SVOp, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVSLDUPMask(SVOp, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasXMMInt);
+
+ if (ShouldXformToMOVHLPS(SVOp) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ EVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
+ V1IsSplat = isSplatVector(V1.getNode());
+ V2IsSplat = isSplatVector(V2.getNode());
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
+ if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ Op = CommuteVectorShuffle(SVOp, DAG);
+ SVOp = cast<ShuffleVectorSDNode>(Op);
+ V1 = SVOp->getOperand(0);
+ V2 = SVOp->getOperand(1);
+ std::swap(V1IsSplat, V2IsSplat);
+ std::swap(V1IsUndef, V2IsUndef);
+ Commuted = true;
+ }
+
+ if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ // Shuffling low element of v1 into undef, just return v1.
+ if (V2IsUndef)
+ return V1;
+ // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+ // the instruction selector will not match, so get a canonical MOVL with
+ // swapped operands to undo the commute.
+ return getMOVL(DAG, dl, VT, V2, V1);
+ }
+
+ if (X86::isUNPCKLMask(SVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+
+ if (X86::isUNPCKHMask(SVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+
+ if (V2IsSplat) {
+ // Normalize mask so all entries that point to V2 points to its first
+ // element then try to match unpck{h|l} again. If match, return a
+ // new vector_shuffle with the corrected mask.
+ SDValue NewMask = NormalizeMask(SVOp, DAG);
+ ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
+ if (NSVOp != SVOp) {
+ if (X86::isUNPCKLMask(NSVOp, true)) {
+ return NewMask;
+ } else if (X86::isUNPCKHMask(NSVOp, true)) {
+ return NewMask;
+ }
+ }
+ }
+
+ if (Commuted) {
+ // Commute is back and try unpck* again.
+ // FIXME: this seems wrong.
+ SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
+ ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+
+ if (X86::isUNPCKLMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+
+ if (X86::isUNPCKHMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ }
+
+ // Normalize the node to match x86 shuffle ops if needed
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // The checks below are all present in isShuffleMaskLegal, but they are
+ // inlined here right now to enable us to directly emit target specific
+ // nodes, and remove one by one until they don't return Op anymore.
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+
+ if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ X86::getShufflePALIGNRImmediate(SVOp),
+ DAG);
+
+ if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+ SVOp->getSplatIndex() == 0 && V2IsUndef) {
+ if (VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ }
+
+ if (isPSHUFHWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ X86::getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ X86::getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Generate target specific nodes for 128 or 256-bit shuffles only
+ // supported in the AVX instruction set.
+ //
+
+ // Handle VMOVDDUPY permutations
+ if (isMOVDDUPYMask(SVOp, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
+
+ // Handle VPERMILPS* permutations
+ if (isVPERMILPSMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
+ getShuffleVPERMILPSImmediate(SVOp), DAG);
+
+ // Handle VPERMILPD* permutations
+ if (isVPERMILPDMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
+ getShuffleVPERMILPDImmediate(SVOp), DAG);
+
+ // Handle VPERM2F128 permutations
+ if (isVPERM2F128Mask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
+ getShuffleVPERM2F128Immediate(SVOp), DAG);
+
+ // Handle VSHUFPSY permutations
+ if (isVSHUFPSYMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ getShuffleVSHUFPSYImmediate(SVOp), DAG);
+
+ // Handle VSHUFPDY permutations
+ if (isVSHUFPDYMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ getShuffleVSHUFPDYImmediate(SVOp), DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Since no target specific shuffle was selected for this generic one,
+ // lower it into other known shuffles. FIXME: this isn't true yet, but
+ // this is the plan.
+ //
+
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ if (VT == MVT::v16i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ // Handle all 128-bit wide vectors with 4 elements, and match them with
+ // several different shuffle types.
+ if (NumElems == 4 && VT.getSizeInBits() == 128)
+ return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
+
+ // Handle general 256-bit shuffles
+ if (VT.is256BitVector())
+ return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ return SDValue();
+
+ if (VT.getSizeInBits() == 8) {
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 16) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1)));
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store or a bitcast to i32. And in
+ // the case of a store, it's not worth it if the index is a constant 0,
+ // because a MOVSSmr can be used instead, which is smaller and faster.
+ if (!Op.hasOneUse())
+ return SDValue();
+ SDNode *User = *Op.getNode()->use_begin();
+ if ((User->getOpcode() != ISD::STORE ||
+ (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ (User->getOpcode() != ISD::BITCAST ||
+ User->getValueType(0) != MVT::i32))
+ return SDValue();
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+ } else if (VT == MVT::i32) {
+ // ExtractPS works with constant index.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return Op;
+ }
+ return SDValue();
+}
+
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+
+ // If this is a 256-bit vector result, first extract the 128-bit vector and
+ // then extract the element from the 128-bit vector.
+ if (VecVT.getSizeInBits() == 256) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ unsigned NumElems = VecVT.getVectorNumElements();
+ SDValue Idx = Op.getOperand(1);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Get the 128-bit vector.
+ bool Upper = IdxVal >= NumElems/2;
+ Vec = Extract128BitVector(Vec,
+ DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx);
+ }
+
+ assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+ SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.getNode())
+ return Res;
+ }
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ // TODO: handle v16i8.
+ if (VT.getSizeInBits() == 16) {
+ SDValue Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, Vec),
+ Op.getOperand(1)));
+ // Transform it so it match pextrw which produces a 32-bit result.
+ EVT EltVT = MVT::i32;
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 32) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // SHUFPS the element to the lowest double word, then movss.
+ int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
+ EVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ } else if (VT.getSizeInBits() == 64) {
+ // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+ // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+ // to match extract_elt for f64.
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+ // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+ int Mask[2] = { 1, -1 };
+ EVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ }
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if (VT.getSizeInBits() == 256)
+ return SDValue();
+
+ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
+ isa<ConstantSDNode>(N2)) {
+ unsigned Opc;
+ if (VT == MVT::v8i16)
+ Opc = X86ISD::PINSRW;
+ else if (VT == MVT::v16i8)
+ Opc = X86ISD::PINSRB;
+ else
+ Opc = X86ISD::PINSRB;
+
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into these
+ // bits. For example (insert (extract, 3), 2) could be matched by putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+ // Create this as a scalar to vector..
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+ // PINSR* works with constant index.
+ return Op;
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ // If this is a 256-bit vector result, first extract the 128-bit vector,
+ // insert the element into the extracted half and then place it back.
+ if (VT.getSizeInBits() == 256) {
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ // Get the desired 128-bit vector half.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+ bool Upper = IdxVal >= NumElems/2;
+ SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32);
+ SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl);
+
+ // Insert the element into the desired half.
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V,
+ N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2);
+
+ // Insert the changed part back to the 256-bit vector
+ return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
+ }
+
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX())
+ return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+ if (EltVT == MVT::i8)
+ return SDValue();
+
+ if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT OpVT = Op.getValueType();
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (OpVT.getSizeInBits() > 128) {
+ // Insert into a 128-bit vector.
+ EVT VT128 = EVT::getVectorVT(*Context,
+ OpVT.getVectorElementType(),
+ OpVT.getVectorNumElements() / 2);
+
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+ // Insert the 128-bit vector.
+ return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+ DAG.getConstant(0, MVT::i32),
+ DAG, dl);
+ }
+
+ if (Op.getValueType() == MVT::v1i64 &&
+ Op.getOperand(0).getValueType() == MVT::i64)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
+ SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+ assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+ "Expected an SSE type!");
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+ && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+ return Extract128BitVector(Vec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue SubVec = Op.getNode()->getOperand(1);
+ SDValue Idx = Op.getNode()->getOperand(2);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+ && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+ return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment(),
+ CP->getOffset(), OpFlag);
+ DebugLoc DL = CP->getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (OpFlag) {
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+ OpFlag);
+ DebugLoc DL = JT->getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (OpFlag)
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
+ OpFlag = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOT;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+ } else if (Subtarget->isPICStyleStubNoDynamic()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY;
+ }
+
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+
+ DebugLoc DL = Op.getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit()) {
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+ }
+
+ // For symbols that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlag))
+ Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ // Create the TargetBlockAddressAddress node.
+ unsigned char OpFlags =
+ Subtarget->ClassifyBlockAddressReference();
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+ /*isTarget=*/true, OpFlags);
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset,
+ SelectionDAG &DAG) const {
+ // Create the TargetGlobalAddress node, folding in the constant
+ // offset if it is legal.
+ unsigned char OpFlags =
+ Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ SDValue Result;
+ if (OpFlags == X86II::MO_NO_FLAG &&
+ X86::isOffsetSuitableForCodeModel(Offset, M)) {
+ // A direct static reference to a global.
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Offset = 0;
+ } else {
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ // For globals that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlags))
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+ SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
+ unsigned char OperandFlags) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(),
+ OperandFlags);
+ if (InFlag) {
+ SDValue Ops[] = { Chain, TGA, *InFlag };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+ } else {
+ SDValue Ops[] = { Chain, TGA };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+ }
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setAdjustsStack(true);
+
+ SDValue Flag = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ SDValue InFlag;
+ DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+ X86::RAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT, TLSModel::Model model,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+ is64Bit ? 257 : 256));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, 0);
+
+ unsigned char OperandFlags = 0;
+ // Most TLS accesses are not RIP relative, even on x86-64. One exception is
+ // initialexec.
+ unsigned WrapperKind = X86ISD::Wrapper;
+ if (model == TLSModel::LocalExec) {
+ OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+ } else if (is64Bit) {
+ assert(model == TLSModel::InitialExec);
+ OperandFlags = X86II::MO_GOTTPOFF;
+ WrapperKind = X86ISD::WrapperRIP;
+ } else {
+ assert(model == TLSModel::InitialExec);
+ OperandFlags = X86II::MO_INDNTPOFF;
+ }
+
+ // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+ // exec)
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
+ SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+ if (model == TLSModel::InitialExec)
+ Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GA->getGlobal();
+
+ if (Subtarget->isTargetELF()) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+
+ TLSModel::Model model
+ = getTLSModel(GV, getTargetMachine().getRelocationModel());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ if (Subtarget->is64Bit())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+ Subtarget->is64Bit());
+ }
+ } else if (Subtarget->isTargetDarwin()) {
+ // Darwin only has one model of TLS. Lower to that.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+ X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
+ if (PIC32)
+ OpFlag = X86II::MO_TLVP_PIC_BASE;
+ else
+ OpFlag = X86II::MO_TLVP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+ GA->getValueType(0),
+ GA->getOffset(), OpFlag);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC32, the address is actually $g + Offset.
+ if (PIC32)
+ Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Offset);
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Args[] = { Chain, Offset };
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
+ // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // And our return value (tls address) is in the standard call return value
+ // location.
+ unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
+ Chain.getValue(1));
+ }
+
+ assert(false &&
+ "TLS not implemented for this target.");
+
+ llvm_unreachable("Unreachable");
+ return SDValue();
+}
+
+
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i8))
+ : DAG.getConstant(0, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ }
+
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits, MVT::i8));
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDValue Hi, Lo;
+ SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+ SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ } else {
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ }
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+
+ if (SrcVT.isVector())
+ return SDValue();
+
+ assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ // These are really Legal; return the operand so the caller accepts it as
+ // Legal.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return Op;
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ Subtarget->is64Bit()) {
+ return Op;
+ }
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Size = SrcVT.getSizeInBits()/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+ SDValue StackSlot,
+ SelectionDAG &DAG) const {
+ // Build the FILD
+ DebugLoc DL = Op.getDebugLoc();
+ SDVTList Tys;
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ if (useSSE)
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+ else
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+ unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
+ MachineMemOperand *MMO;
+ if (FI) {
+ int SSFI = FI->getIndex();
+ MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ } else {
+ MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+ StackSlot = StackSlot.getOperand(1);
+ }
+ SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
+ SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+ X86ISD::FILD, DL,
+ Tys, Ops, array_lengthof(Ops),
+ SrcVT, MMO);
+
+ if (useSSE) {
+ Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {
+ Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
+ };
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+ Ops, array_lengthof(Ops),
+ Op.getValueType(), MMO);
+ Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ }
+
+ return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+ SelectionDAG &DAG) const {
+ // This algorithm is not obvious. Here it is in C code, more or less:
+ /*
+ double uint64_to_double( uint32_t hi, uint32_t lo ) {
+ static const __m128i exp = { 0x4330000045300000ULL, 0 };
+ static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+ // Copy ints to xmm registers.
+ __m128i xh = _mm_cvtsi32_si128( hi );
+ __m128i xl = _mm_cvtsi32_si128( lo );
+
+ // Combine into low half of a single xmm register.
+ __m128i x = _mm_unpacklo_epi32( xh, xl );
+ __m128d d;
+ double sd;
+
+ // Merge in appropriate exponents to give the integer bits the right
+ // magnitude.
+ x = _mm_unpacklo_epi32( x, exp );
+
+ // Subtract away the biases to deal with the IEEE-754 double precision
+ // implicit 1.
+ d = _mm_sub_pd( (__m128d) x, bias );
+
+ // All conversions up to here are exact. The correctly rounded result is
+ // calculated using the current rounding mode using the following
+ // horizontal add.
+ d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+ _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this
+ // store doesn't really need to be here (except
+ // maybe to zero the other double)
+ return sd;
+ }
+ */
+
+ DebugLoc dl = Op.getDebugLoc();
+ LLVMContext *Context = DAG.getContext();
+
+ // Build some magic constants.
+ std::vector<Constant*> CV0;
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+ Constant *C0 = ConstantVector::get(CV0);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+ std::vector<Constant*> CV1;
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ Constant *C1 = ConstantVector::get(CV1);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(1)));
+ SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+ SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+ SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
+ SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+
+ // Add the halves; easiest way is to swap them into another reg first.
+ int ShufMask[2] = { 1, -1 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
+ DAG.getUNDEF(MVT::v2f64), ShufMask);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+ DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // FP constant to bias correct the final result.
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+
+ // Load the 32-bit value into an XMM register.
+ SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ Op.getOperand(0));
+
+ // Zero out the upper parts of the register.
+ Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(),
+ DAG);
+
+ Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+ DAG.getIntPtrConstant(0));
+
+ // Or the load with the bias.
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Load)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Bias)));
+ Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
+ DAG.getIntPtrConstant(0));
+
+ // Subtract the bias.
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+ // Handle final rounding.
+ EVT DestVT = Op.getValueType();
+
+ if (DestVT.bitsLT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+
+ // Handle final rounding.
+ return Sub;
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+ // the optimization here.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+ EVT SrcVT = N0.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
+ return LowerUINT_TO_FP_i64(Op, DAG);
+ else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+ return LowerUINT_TO_FP_i32(Op, DAG);
+
+ // Make a 64-bit buffer, and use it to build an FILD.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+ if (SrcVT == MVT::i32) {
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, MachinePointerInfo(),
+ false, false, 0);
+ SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ return Fild;
+ }
+
+ assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
+ // For i64 source, we need to add the appropriate power of 2 if the input
+ // was negative. This is the same as the optimization in
+ // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+ // we must be careful to do the computation in x87 extended precision, not
+ // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
+
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+ MVT::i64, MMO);
+
+ APInt FF(32, 0x5F800000ULL);
+
+ // Check whether the sign bit is set.
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+ // Load the value out, extending it from f32 to f80.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, 4);
+ // Extend everything to 80 bits to force it to be done on x87.
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
+}
+
+std::pair<SDValue,SDValue> X86TargetLowering::
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
+ DebugLoc DL = Op.getDebugLoc();
+
+ EVT DstTy = Op.getValueType();
+
+ if (!IsSigned) {
+ assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+ DstTy = MVT::i64;
+ }
+
+ assert(DstTy.getSimpleVT() <= MVT::i64 &&
+ DstTy.getSimpleVT() >= MVT::i16 &&
+ "Unknown FP_TO_SINT to lower!");
+
+ // These are really Legal.
+ if (DstTy == MVT::i32 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+ if (Subtarget->is64Bit() &&
+ DstTy == MVT::i64 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = DstTy.getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+
+ unsigned Opc;
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Value = Op.getOperand(0);
+ EVT TheVT = Op.getOperand(0).getValueType();
+ if (isScalarFPTypeInSSEReg(TheVT)) {
+ assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ Chain, StackSlot, DAG.getValueType(TheVT)
+ };
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+ DstTy, MMO);
+ Chain = Value.getValue(1);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+ StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
+ // Build the FP_TO_INT*_IN_MEM
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
+
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return SDValue();
+
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (FIST.getNode() == 0) return Op;
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ assert(FIST.getNode() && "Unexpected failure");
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op,
+ SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ if (VT.isVector()) {
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::XOR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+ } else {
+ return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+ }
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Op1.getValueType();
+
+ // If second operand is smaller, extend it first.
+ if (SrcVT.bitsLT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+ SrcVT = VT;
+ }
+ // And if it is bigger, shrink it first.
+ if (SrcVT.bitsGT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
+
+ // First get the sign bit of second operand.
+ std::vector<Constant*> CV;
+ if (SrcVT == MVT::f64) {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+ // Shift sign bit right or left if the two operands have different types.
+ if (SrcVT.bitsGT(VT)) {
+ // Op0 is MVT::f32, Op1 is MVT::f64.
+ SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+ SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+ DAG.getConstant(32, MVT::i32));
+ SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Clear first operand sign bit.
+ CV.clear();
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ }
+ C = ConstantVector::get(CV);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
+ SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CF and OF aren't always set the way we want. Determine which
+ // of these we need.
+ bool NeedCF = false;
+ bool NeedOF = false;
+ switch (X86CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ NeedOF = true;
+ break;
+ }
+
+ // See if we can use the EFLAGS value from the operand instead of
+ // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+ // we prove that the arithmetic won't overflow, we can't use OF or CF.
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
+ }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
+ }
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ NonFlagUse = true;
+ break;
+ }
+ }
+
+ if (!NonFlagUse)
+ break;
+ }
+ // FALL THROUGH
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
+ }
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
+ }
+
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) const {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+ if (C->getAPIntValue() == 0)
+ return EmitTest(Op0, X86CC, DAG);
+
+ DebugLoc dl = Op0.getDebugLoc();
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ SDValue Op0 = And.getOperand(0);
+ SDValue Op1 = And.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
+ SDValue LHS, RHS;
+ if (Op1.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+ if (And00C->getZExtValue() == 1) {
+ // If we looked past a truncate, check that it's only truncating away
+ // known zeros.
+ unsigned BitWidth = Op0.getValueSizeInBits();
+ unsigned AndBitWidth = And.getValueSizeInBits();
+ if (BitWidth > AndBitWidth) {
+ APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+ return SDValue();
+ }
+ LHS = Op1;
+ RHS = Op0.getOperand(1);
+ }
+ } else if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ SDValue AndLHS = Op0;
+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (LHS.getValueType() == MVT::i8 ||
+ LHS.getValueType() == MVT::i16)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
+
+ assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
+ }
+
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+ }
+
+ bool isFP = Op1.getValueType().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+ if (X86CC == X86::COND_INVALID)
+ return SDValue();
+
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
+// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
+// ones, and then concatenate the result back.
+static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ "Unsupported value type for operation");
+
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC = Op.getOperand(2);
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+
+ // Issue the operation on the smaller types and concatenate the result back
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
+}
+
+
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (isFP) {
+ unsigned SSECC = 8;
+ EVT EltVT = Op0.getValueType().getVectorElementType();
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+
+ unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+ bool Swap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOEQ:
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETOGT:
+ case ISD::SETGT: Swap = true; // Fallthrough
+ case ISD::SETLT:
+ case ISD::SETOLT: SSECC = 1; break;
+ case ISD::SETOGE:
+ case ISD::SETGE: Swap = true; // Fallthrough
+ case ISD::SETLE:
+ case ISD::SETOLE: SSECC = 2; break;
+ case ISD::SETUO: SSECC = 3; break;
+ case ISD::SETUNE:
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: SSECC = 5; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: SSECC = 6; break;
+ case ISD::SETO: SSECC = 7; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // In the two special cases we can't handle, emit two comparisons.
+ if (SSECC == 8) {
+ if (SetCCOpcode == ISD::SETUEQ) {
+ SDValue UNORD, EQ;
+ UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
+ }
+ else if (SetCCOpcode == ISD::SETONE) {
+ SDValue ORD, NEQ;
+ ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ }
+ llvm_unreachable("Illegal FP comparison");
+ }
+ // Handle all other FP comparisons here.
+ return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+ }
+
+ // Break 256-bit integer vector compare into smaller ones.
+ if (!isFP && VT.getSizeInBits() == 256)
+ return Lower256IntVSETCC(Op, DAG);
+
+ // We are handling one of the integer comparisons here. Since SSE only has
+ // GT and EQ comparisons for integer, swapping operands and multiple
+ // operations may be required for some comparisons.
+ unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+ bool Swap = false, Invert = false, FlipSigns = false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+ case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+ case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+ case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = EQOpc; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = GTOpc; break;
+ case ISD::SETGE: Swap = true;
+ case ISD::SETLE: Opc = GTOpc; Invert = true; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+ case ISD::SETUGE: Swap = true;
+ case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Check that the operation in question is available (most are plain SSE2,
+ // but PCMPGTQ and PCMPEQQ have different requirements).
+ if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+ return SDValue();
+
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations.
+ if (FlipSigns) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+ EltVT);
+ std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+ SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+ SignBits.size());
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ }
+
+ SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+ // If the logical-not of the result is required, perform that now.
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+ unsigned Opc = Op.getNode()->getOpcode();
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+ return true;
+ if (Op.getResNo() == 1 &&
+ (Opc == X86ISD::ADD ||
+ Opc == X86ISD::SUB ||
+ Opc == X86ISD::ADC ||
+ Opc == X86ISD::SBB ||
+ Opc == X86ISD::SMUL ||
+ Opc == X86ISD::UMUL ||
+ Opc == X86ISD::INC ||
+ Opc == X86ISD::DEC ||
+ Opc == X86ISD::OR ||
+ Opc == X86ISD::XOR ||
+ Opc == X86ISD::AND))
+ return true;
+
+ if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ return true;
+
+ return false;
+}
+
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ bool addTest = true;
+ SDValue Cond = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
+
+ // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+ // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+ // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+ if (Cond.getOpcode() == X86ISD::SETCC &&
+ Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+ isZero(Cond.getOperand(1).getOperand(1))) {
+ SDValue Cmp = Cond.getOperand(1);
+
+ unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+ if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+ SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue Res = // Res = 0 or -1.
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+ if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ Res = DAG.getNOT(DL, Res, Res.getValueType());
+
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
+ if (N2C == 0 || !N2C->isNullValue())
+ Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ return Res;
+ }
+ }
+
+ // Look past (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ EVT VT = Op.getValueType();
+
+ bool IllegalFPCMov = false;
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+ if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+ Opc == X86ISD::BT) { // FIXME
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+
+ // a < b ? -1 : 0 -> RES = ~setcc_carry
+ // a < b ? 0 : -1 -> RES = setcc_carry
+ // a >= b ? -1 : 0 -> RES = setcc_carry
+ // a >= b ? 0 : -1 -> RES = ~setcc_carry
+ if (Cond.getOpcode() == X86ISD::CMP) {
+ unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+ (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+ if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ return DAG.getNOT(DL, Res, Res.getValueType());
+ return Res;
+ }
+ }
+
+ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+ // condition is true.
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SDValue Ops[] = { Op2, Op1, CC, Cond };
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+ Opc = Op.getOpcode();
+ if (Opc != ISD::OR && Opc != ISD::AND)
+ return false;
+ return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse() &&
+ Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+ if (Op.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (N1C && N1C->getAPIntValue() == 1) {
+ return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse();
+ }
+ return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ bool addTest = true;
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
+#if 0
+ // FIXME: LowerXALUO doesn't handle these!!
+ else if (Cond.getOpcode() == X86ISD::ADD ||
+ Cond.getOpcode() == X86ISD::SUB ||
+ Cond.getOpcode() == X86ISD::SMUL ||
+ Cond.getOpcode() == X86ISD::UMUL)
+ Cond = LowerXALUO(Cond, DAG);
+#endif
+
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+ if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+ Cond = Cmp;
+ addTest = false;
+ } else {
+ switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+ default: break;
+ case X86::COND_O:
+ case X86::COND_B:
+ // These can only come from an arithmetic instruction with overflow,
+ // e.g. SADDO, UADDO.
+ Cond = Cond.getNode()->getOperand(1);
+ addTest = false;
+ break;
+ }
+ }
+ } else {
+ unsigned CondOpc;
+ if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+ SDValue Cmp = Cond.getOperand(0).getOperand(1);
+ if (CondOpc == ISD::OR) {
+ // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp)) {
+ CC = Cond.getOperand(0).getOperand(0);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = Cond.getOperand(1).getOperand(0);
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else { // ISD::AND
+ // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp) &&
+ Op.getNode()->hasOneUse()) {
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+ Dest = FalseBB;
+
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ }
+ } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+ // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+ // It should be transformed during dag combiner except when the condition
+ // is set by a arithmetics with overflow node.
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cond.getOperand(0).getOperand(1);
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+ return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cond);
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
+ EnableSegmentedStacks) &&
+ "This should be used only on Windows targets or when segmented stacks "
+ "are being used");
+ assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ // FIXME: Ensure alignment here
+
+ bool Is64Bit = Subtarget->is64Bit();
+ EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+
+ if (EnableSegmentedStacks) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (Is64Bit) {
+ // The 64 bit implementation of segmented stacks needs to clobber both r10
+ // r11. This makes it impossible to use it along with nested parameters.
+ const Function *F = MF.getFunction();
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++)
+ if (I->hasNestAttr())
+ report_fatal_error("Cannot use segmented stacks with functions that "
+ "have nested arguments.");
+ }
+
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+ SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+ DAG.getRegister(Vreg, SPTy));
+ SDValue Ops1[2] = { Value, Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ } else {
+ SDValue Flag;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
+ Flag = Chain.getValue(1);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+ Flag = Chain.getValue(1);
+
+ Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ }
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ SmallVector<SDValue, 8> MemOps;
+ SDValue FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDValue Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+ MVT::i32),
+ FIN, MachinePointerInfo(SV), false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+ MVT::i32),
+ FIN, MachinePointerInfo(SV, 4), false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+ MachinePointerInfo(SV, 8),
+ false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(8));
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+ MachinePointerInfo(SV, 16), false, false, 0);
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ Subtarget->hasXMM());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+ DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+ false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ // Comparison intrinsics.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ unsigned Opc = 0;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse2_comilt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse2_comile_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse2_comigt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse2_comige_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse2_comineq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETNE;
+ break;
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETNE;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+ SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+ // Arithmetic intrinsics.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ // ptest and testp intrinsics. The intrinsic these come from are designed to
+ // return an integer value, not just an instruction so lower it to the ptest
+ // or testp pattern and a setcc for the result.
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestz_256:
+ case Intrinsic::x86_avx_ptestc_256:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256: {
+ bool IsTestPacked = false;
+ unsigned X86CC = 0;
+ switch (IntNo) {
+ default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_avx_ptestz_256:
+ // ZF = 1
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_avx_ptestc_256:
+ // CF = 1
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ // ZF and CF = 0
+ X86CC = X86::COND_A;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+ SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = Op.getOperand(2);
+ if (isa<ConstantSDNode>(ShAmt))
+ return SDValue();
+
+ unsigned NewIntNo = 0;
+ EVT ShAmtVT = MVT::v4i32;
+ switch (IntNo) {
+ case Intrinsic::x86_sse2_pslli_w:
+ NewIntNo = Intrinsic::x86_sse2_psll_w;
+ break;
+ case Intrinsic::x86_sse2_pslli_d:
+ NewIntNo = Intrinsic::x86_sse2_psll_d;
+ break;
+ case Intrinsic::x86_sse2_pslli_q:
+ NewIntNo = Intrinsic::x86_sse2_psll_q;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ break;
+ case Intrinsic::x86_sse2_psrli_d:
+ NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ break;
+ case Intrinsic::x86_sse2_psrli_q:
+ NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ NewIntNo = Intrinsic::x86_sse2_psra_w;
+ break;
+ case Intrinsic::x86_sse2_psrai_d:
+ NewIntNo = Intrinsic::x86_sse2_psra_d;
+ break;
+ default: {
+ ShAmtVT = MVT::v2i32;
+ switch (IntNo) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+ break;
+ }
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ if (ShAmtVT == MVT::v4i32) {
+ ShOps[2] = DAG.getUNDEF(MVT::i32);
+ ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+ } else {
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
+ }
+
+ EVT VT = Op.getValueType();
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(NewIntNo, MVT::i32),
+ Op.getOperand(1), ShAmt);
+ }
+ }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(),
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+ SelectionDAG &DAG) const {
+ return DAG.getIntPtrConstant(2*TD->getPointerSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
+ unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(TD->getPointerSize()));
+ StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+ MF.getRegInfo().addLiveOut(StoreAddrReg);
+
+ return DAG.getNode(X86ISD::EH_RETURN, dl,
+ MVT::Other,
+ Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Root = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ if (Subtarget->is64Bit()) {
+ SDValue OutChains[6];
+
+ // Large code-model.
+ const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
+ const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
+
+ const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10);
+ const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDValue Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2),
+ false, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr, 10),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12),
+ false, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr, 20),
+ false, false, 0);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 22),
+ false, false, 0);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
+ } else {
+ const Function *Func =
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ CallingConv::ID CC = Func->getCallingConv();
+ unsigned NestReg;
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::X86_StdCall: {
+ // Pass 'nest' parameter in ECX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::ECX;
+
+ // Check that ECX wasn't needed by an 'inreg' parameter.
+ FunctionType *FTy = Func->getFunctionType();
+ const AttrListPtr &Attrs = Func->getAttributes();
+
+ if (!Attrs.isEmpty() && !Func->isVarArg()) {
+ unsigned InRegCount = 0;
+ unsigned Idx = 1;
+
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I, ++Idx)
+ if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+ // FIXME: should only count parameters that are lowered to integers.
+ InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+ if (InRegCount > 2) {
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
+ }
+ }
+ break;
+ }
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::Fast:
+ // Pass 'nest' parameter in EAX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::EAX;
+ break;
+ }
+
+ SDValue OutChains[4];
+ SDValue Addr, Disp;
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(10, MVT::i32));
+ Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+ // This is storing the opcode for MOV32ri.
+ const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
+ const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg);
+ OutChains[0] = DAG.getStore(Root, dl,
+ DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+ Trmp, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(1, MVT::i32));
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1),
+ false, false, 1);
+
+ const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(5, MVT::i32));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 5),
+ false, false, 1);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(6, MVT::i32));
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6),
+ false, false, 1);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
+ }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, 2, 2);
+
+ SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, 2, MVT::i16, MMO);
+
+ // Load FP Control Word from stack slot
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+ MachinePointerInfo(), false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ DAG.getNode(ISD::ADD, DL, MVT::i16,
+ DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
+}
+
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SDValue Ops[] = {
+ Op,
+ DAG.getConstant(NumBits+NumBits-1, OpVT),
+ DAG.getConstant(X86::COND_E, MVT::i8),
+ Op.getValue(1)
+ };
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SDValue Ops[] = {
+ Op,
+ DAG.getConstant(NumBits, OpVT),
+ DAG.getConstant(X86::COND_E, MVT::i8),
+ Op.getValue(1)
+ };
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
+// ones, and then concatenate the result back.
+static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ "Unsupported value type for operation");
+
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
+}
+
+SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType().getSizeInBits() == 256 &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType().getSizeInBits() == 256 &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.getSizeInBits() == 256)
+ return Lower256IntArith(Op, DAG);
+
+ assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+ // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+ // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+ // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+ // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ //
+ // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ A, DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ B, DAG.getConstant(32, MVT::i32));
+ SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, B);
+ SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, Bhi);
+ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ Ahi, B);
+ AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AloBhi, DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+ Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+ return Res;
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ LLVMContext *Context = DAG.getContext();
+
+ if (!Subtarget->hasXMMInt())
+ return SDValue();
+
+ // Decompose 256-bit shifts into smaller 128-bit shifts.
+ if (VT.getSizeInBits() == 256) {
+ int NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ // Extract the two vectors
+ SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+
+ // Recreate the shift amount vectors
+ SDValue Amt1, Amt2;
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ // Constant shift amount
+ SmallVector<SDValue, 4> Amt1Csts;
+ SmallVector<SDValue, 4> Amt2Csts;
+ for (int i = 0; i < NumElems/2; ++i)
+ Amt1Csts.push_back(Amt->getOperand(i));
+ for (int i = NumElems/2; i < NumElems; ++i)
+ Amt2Csts.push_back(Amt->getOperand(i));
+
+ Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt1Csts[0], NumElems/2);
+ Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt2Csts[0], NumElems/2);
+ } else {
+ // Variable shift amount
+ Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ // Issue new vector shifts for the smaller types
+ V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+ V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+ // Concatenate the result back
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+ }
+
+ // Optimize shl/srl/sra with constant shift amount.
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ // Lower SHL with variable shift amount.
+ if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+ ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+ std::vector<Constant*> CV(4, CI);
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+ // a = a << 5;
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+ ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+ ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+ std::vector<Constant*> CVM1(16, CM1);
+ std::vector<Constant*> CVM2(16, CM2);
+ Constant *C = ConstantVector::get(CVM1);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)15, 4), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(4, MVT::i32));
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ C = ConstantVector::get(CVM2);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(2, MVT::i32));
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ // return pblendv(r, r+r, a);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R));
+ return R;
+ }
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned BaseOp = 0;
+ unsigned Cond = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown ovf instruction!");
+ case ISD::SADDO:
+ // A subtract of one will be selected as a INC. Note that INC doesn't
+ // set CF, so we can't do this for UADDO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ // A subtract of one will be selected as a DEC. Note that DEC doesn't
+ // set CF, so we can't do this for USUBO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+ MVT::i32);
+ SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(X86::COND_O, MVT::i32),
+ SDValue(Sum.getNode(), 2));
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
+ }
+ }
+
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32),
+ SDValue(Sum.getNode(), 1));
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
+}
+
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode* Node = Op.getNode();
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+ if (Subtarget->hasXMMInt() && VT.isVector()) {
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+ unsigned SHLIntrinsicsID = 0;
+ unsigned SRAIntrinsicsID = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v4i32: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+ break;
+ }
+ case MVT::v8i16: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+ break;
+ }
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+ Node->getOperand(0), ShAmt);
+
+ // In case of 1 bit sext, no need to shr
+ if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+ Tmp1, ShAmt);
+ }
+
+ return SDValue();
+}
+
+
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+ // There isn't any reason to disable it if the target processor supports it.
+ if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if (!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
+SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
+ // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ // no-sse2). There isn't any reason to disable it if the target processor
+ // supports it.
+ if (Subtarget->hasXMMInt() || Subtarget->is64Bit())
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+ EVT T = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned Reg = 0;
+ unsigned size = 0;
+ switch(T.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: Reg = X86::AL; size = 1; break;
+ case MVT::i16: Reg = X86::AX; size = 2; break;
+ case MVT::i32: Reg = X86::EAX; size = 4; break;
+ case MVT::i64:
+ assert(Subtarget->is64Bit() && "Node not type legal!");
+ Reg = X86::RAX; size = 8;
+ break;
+ }
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
+ Op.getOperand(2), SDValue());
+ SDValue Ops[] = { cpIn.getValue(0),
+ Op.getOperand(1),
+ Op.getOperand(3),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+ Ops, 5, T, MMO);
+ SDValue cpOut =
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+ return cpOut;
+}
+
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->is64Bit() && "Result not type legalized?");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TheChain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+ SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+ rax.getValue(2));
+ SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+ DAG.getConstant(32, MVT::i8));
+ SDValue Ops[] = {
+ DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+ rdx.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ EVT DstVT = Op.getValueType();
+ assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() &&
+ Subtarget->hasMMX() && "Unexpected custom BITCAST");
+ assert((DstVT == MVT::i64 ||
+ (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+ "Unexpected custom BITCAST");
+ // i64 <=> MMX conversions are Legal.
+ if (SrcVT==MVT::i64 && DstVT.isVector())
+ return Op;
+ if (DstVT==MVT::i64 && SrcVT.isVector())
+ return Op;
+ // MMX <=> MMX conversions are Legal.
+ if (SrcVT.isVector() && DstVT.isVector())
+ return Op;
+ // All other conversions need to be expanded.
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT T = Node->getValueType(0);
+ SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+}
+
+static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert seq_cst store -> xchg
+ // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
+ // FIXME: On 32-bit, store -> fist or movq would be more efficient
+ // (The only way to get a 16-byte store is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
+ if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ return Swap.getValue(1);
+ }
+ // Other atomic stores have a simple pattern.
+ return Op;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = X86ISD::ADD; break;
+ case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+ case ISD::SUBC: Opc = X86ISD::SUB; break;
+ case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
+ case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: return LowerShift(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO: return LowerXALUO(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ADD: return LowerADD(Op, DAG);
+ case ISD::SUB: return LowerSUB(Op, DAG);
+ }
+}
+
+static void ReplaceATOMIC_LOAD(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert wide load -> cmpxchg8b/cmpxchg16b
+ // FIXME: On 32-bit, load -> fild or movq would be more efficient
+ // (The only way to get a 16-byte load is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG, unsigned NewOp) const {
+ DebugLoc dl = Node->getDebugLoc();
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0));
+ SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1));
+ SDValue Ops[] = { Chain, In1, In2L, In2H };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ // We don't want to expand or promote these.
+ return;
+ case ISD::FP_TO_SINT: {
+ std::pair<SDValue,SDValue> Vals =
+ FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.getNode() != 0) {
+ EVT VT = N->getValueType(0);
+ // Return a load from the stack slot.
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(), false, false, 0));
+ }
+ return;
+ }
+ case ISD::READCYCLECOUNTER: {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TheChain = N->getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+ rd.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+ eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { eax, edx };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+ Results.push_back(edx.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_CMP_SWAP: {
+ EVT T = N->getValueType(0);
+ assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
+ bool Regs64bit = T == MVT::i128;
+ EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
+ SDValue cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(0, HalfT));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(1, HalfT));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ cpInH, cpInL.getValue(1));
+ SDValue swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(0, HalfT));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(1, HalfT));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
+ Regs64bit ? X86::RBX : X86::EBX,
+ swapInL, cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
+ Regs64bit ? X86::RCX : X86::ECX,
+ swapInH, swapInL.getValue(1));
+ SDValue Ops[] = { swapInH.getValue(0),
+ N->getOperand(1),
+ swapInH.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
+ X86ISD::LCMPXCHG8_DAG;
+ SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
+ Ops, 3, T, MMO);
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ HalfT, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ HalfT, cpOutL.getValue(2));
+ SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2));
+ Results.push_back(cpOutH.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD:
+ ReplaceATOMIC_LOAD(N, Results, DAG);
+ }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
+ case X86ISD::SHLD: return "X86ISD::SHLD";
+ case X86ISD::SHRD: return "X86ISD::SHRD";
+ case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FOR: return "X86ISD::FOR";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
+ case X86ISD::FSRL: return "X86ISD::FSRL";
+ case X86ISD::FILD: return "X86ISD::FILD";
+ case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+ case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+ case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FLD: return "X86ISD::FLD";
+ case X86ISD::FST: return "X86ISD::FST";
+ case X86ISD::CALL: return "X86ISD::CALL";
+ case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::BT: return "X86ISD::BT";
+ case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::COMI: return "X86ISD::COMI";
+ case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
+ case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
+ case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
+ case X86ISD::CMOV: return "X86ISD::CMOV";
+ case X86ISD::BRCOND: return "X86ISD::BRCOND";
+ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
+ case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
+ case X86ISD::Wrapper: return "X86ISD::Wrapper";
+ case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
+ case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
+ case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
+ case X86ISD::PINSRB: return "X86ISD::PINSRB";
+ case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::ANDNP: return "X86ISD::ANDNP";
+ case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
+ case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
+ case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::FHADD: return "X86ISD::FHADD";
+ case X86ISD::FHSUB: return "X86ISD::FHSUB";
+ case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
+ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
+ case X86ISD::CMPPD: return "X86ISD::CMPPD";
+ case X86ISD::CMPPS: return "X86ISD::CMPPS";
+ case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
+ case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
+ case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
+ case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
+ case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
+ case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
+ case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
+ case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::ADD: return "X86ISD::ADD";
+ case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::ADC: return "X86ISD::ADC";
+ case X86ISD::SBB: return "X86ISD::SBB";
+ case X86ISD::SMUL: return "X86ISD::SMUL";
+ case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::INC: return "X86ISD::INC";
+ case X86ISD::DEC: return "X86ISD::DEC";
+ case X86ISD::OR: return "X86ISD::OR";
+ case X86ISD::XOR: return "X86ISD::XOR";
+ case X86ISD::AND: return "X86ISD::AND";
+ case X86ISD::ANDN: return "X86ISD::ANDN";
+ case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
+ case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
+ case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD";
+ case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
+ case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD";
+ case X86ISD::SHUFPS: return "X86ISD::SHUFPS";
+ case X86ISD::SHUFPD: return "X86ISD::SHUFPD";
+ case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
+ case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
+ case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
+ case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
+ case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
+ case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
+ case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
+ case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD";
+ case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
+ case X86ISD::MOVSD: return "X86ISD::MOVSD";
+ case X86ISD::MOVSS: return "X86ISD::MOVSS";
+ case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
+ case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
+ case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
+ case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
+ case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
+ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
+ case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
+ case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
+ case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
+ case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
+ case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
+ case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
+ case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
+ case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
+ case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
+ case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
+ case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
+ case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
+ }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // X86 supports extremely general addressing modes.
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ Reloc::Model R = getTargetMachine().getRelocationModel();
+
+ // X86 allows a sign-extended 32-bit immediate field as a displacement.
+ if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
+ return false;
+
+ if (AM.BaseGV) {
+ unsigned GVFlags =
+ Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+ // If a reference to this global requires an extra load, we can't fold it.
+ if (isGlobalStubReference(GVFlags))
+ return false;
+
+ // If BaseGV requires a register for the PIC base, we cannot also have a
+ // BaseReg specified.
+ if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
+ return false;
+
+ // If lower 4G is not available, then we must use rip-relative addressing.
+ if ((M != CodeModel::Small || R != Reloc::Static) &&
+ Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ return false;
+ }
+
+ switch (AM.Scale) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ // These scales always work.
+ break;
+ case 3:
+ case 5:
+ case 9:
+ // These scales are formed with basereg+scalereg. Only accept if there is
+ // no basereg yet.
+ if (AM.HasBaseReg)
+ return false;
+ break;
+ default: // Other stuff never works.
+ return false;
+ }
+
+ return true;
+}
+
+
+bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return true;
+}
+
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return true;
+}
+
+bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ // Very little shuffling can be done for 64-bit vectors right now.
+ if (VT.getSizeInBits() == 64)
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX());
+
+ // FIXME: pshufb, blends, shifts.
+ return (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isMOVLMask(M, VT) ||
+ isSHUFPMask(M, VT) ||
+ isPSHUFDMask(M, VT) ||
+ isPSHUFHWMask(M, VT) ||
+ isPSHUFLWMask(M, VT) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
+ isUNPCKLMask(M, VT) ||
+ isUNPCKHMask(M, VT) ||
+ isUNPCKL_v_undef_Mask(M, VT) ||
+ isUNPCKH_v_undef_Mask(M, VT));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+ // FIXME: This collection of masks seems suspect.
+ if (NumElts == 2)
+ return true;
+ if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ return (isMOVLMask(Mask, VT) ||
+ isCommutedMOVLMask(Mask, VT, true) ||
+ isSHUFPMask(Mask, VT) ||
+ isCommutedSHUFPMask(Mask, VT));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned LoadOpc,
+ unsigned CXchgOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [bitinstr.addr]
+ // op t2 = t1, [bitinstr.val]
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ DebugLoc dl = bInstr->getDebugLoc();
+ MachineOperand& destOper = bInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ int numArgs = bInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &bInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ unsigned tt = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
+ }
+ else
+ tt = t1;
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
+ MIB.addReg(tt);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t2);
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).setMemRefs(bInstr->memoperands_begin(),
+ bInstr->memoperands_end());
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+ MIB.addReg(EAXreg);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function: 64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB (instructions are in pairs, except cmpxchg8b)
+ // ld t1,t2 = [bitinstr.addr]
+ // newMBB:
+ // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+ // op t5, t6 <- out1, out2, [bitinstr.val]
+ // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
+ // mov ECX, EBX <- t5, t6
+ // mov EAX, EDX <- t1, t2
+ // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
+ // mov t3, t4 <- EAX, EDX
+ // bz newMBB
+ // result in out1, out2
+ // fallthrough -->nextMBB
+
+ const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const unsigned LoadOpc = X86::MOV32rm;
+ const unsigned NotOpc = X86::NOT32r;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = bInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
+ "unexpected number of operands");
+ MachineOperand& dest1Oper = bInstr->getOperand(0);
+ MachineOperand& dest2Oper = bInstr->getOperand(1);
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
+ argOpers[i] = &bInstr->getOperand(i+2);
+
+ // We use some of the operands multiple times, so conservatively just
+ // clear any kill flags that might be present.
+ if (argOpers[i]->isReg() && argOpers[i]->isUse())
+ argOpers[i]->setIsKill(false);
+ }
+
+ // x86 address has 5 operands: base, index, scale, displacement, and segment.
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
+ // add 4 to displacement.
+ for (int i=0; i <= lastAddrIndx-2; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MachineOperand newOp3 = *(argOpers[3]);
+ if (newOp3.isImm())
+ newOp3.setImm(newOp3.getImm()+4);
+ else
+ newOp3.setOffset(newOp3.getOffset()+4);
+ (*MIB).addOperand(newOp3);
+ (*MIB).addOperand(*argOpers[lastAddrIndx]);
+
+ // t3/4 are defined later, at the bottom of the loop
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
+ .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+ // The subsequent operations should be using the destination registers of
+ //the PHI instructions.
+ if (invSrc) {
+ t1 = F->getRegInfo().createVirtualRegister(RC);
+ t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
+ } else {
+ t1 = dest1Oper.getReg();
+ t2 = dest2Oper.getReg();
+ }
+
+ int valArgIndx = lastAddrIndx + 1;
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
+ if (regOpcL != X86::MOV32rr)
+ MIB.addReg(t1);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+ assert(argOpers[valArgIndx + 1]->isReg() ==
+ argOpers[valArgIndx]->isReg());
+ assert(argOpers[valArgIndx + 1]->isImm() ==
+ argOpers[valArgIndx]->isImm());
+ if (argOpers[valArgIndx + 1]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
+ if (regOpcH != X86::MOV32rr)
+ MIB.addReg(t2);
+ (*MIB).addOperand(*argOpers[valArgIndx + 1]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+ MIB.addReg(t1);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
+ MIB.addReg(t5);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
+ MIB.addReg(t6);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).setMemRefs(bInstr->memoperands_begin(),
+ bInstr->memoperands_end());
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
+ MIB.addReg(X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
+ MIB.addReg(X86::EDX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+ MachineBasicBlock *MBB,
+ unsigned cmovOpc) const {
+ // For the atomic min/max operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [min/max.addr]
+ // mov t2 = [min/max.val]
+ // cmp t1, t2
+ // cmov[cond] t2 = t1
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ //
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(mInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to newMBB and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = mInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ MachineOperand& destOper = mInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ int numArgs = mInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &mInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ // We only support register and immediate values
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
+ MIB.addReg(t1);
+ MIB.addReg(t2);
+
+ // Generate movc
+ unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
+ MIB.addReg(t2);
+ MIB.addReg(t1);
+
+ // Cmp and exchange if none has modified the memory location
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t3);
+ assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).setMemRefs(mInstr->memoperands_begin(),
+ mInstr->memoperands_end());
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+ mInstr->eraseFromParent(); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
+MachineBasicBlock *
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned numArgs, bool memArg) const {
+ assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ "Target must have SSE4.2 or AVX features enabled");
+
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned Opc;
+ if (!Subtarget->hasAVX()) {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+ } else {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+ for (unsigned i = 0; i < numArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i+1);
+ if (!(Op.isReg() && Op.isImplicit()))
+ MIB.addOperand(Op);
+ }
+ BuildMI(*BB, MI, dl,
+ TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
+ MI->getOperand(0).getReg())
+ .addReg(X86::XMM0);
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // Address into RAX/EAX, other two args into ECX, EDX.
+ unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ unsigned ValOps = X86::AddrNumOperands;
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(ValOps).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+ .addReg(MI->getOperand(ValOps+1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // First arg in ECX, the second in EAX.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(0).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI->getOperand(1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit code to save XMM registers to the stack. The ABI says that the
+ // number of registers to save is given in %al, so it's theoretically
+ // possible to do an indirect jump trick to avoid saving all of them,
+ // however this code takes a simpler approach and just executes all
+ // of the stores if %al is non-zero. It's less code, and it's probably
+ // easier on the hardware branch predictor, and stores aren't all that
+ // expensive anyway.
+
+ // Create the new basic blocks. One block contains all the XMM stores,
+ // and one block is the final destination regardless of whether any
+ // stores were performed.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *F = MBB->getParent();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+ MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, XMMSaveMBB);
+ F->insert(MBBIter, EndMBB);
+
+ // Transfer the remainder of MBB and its successor edges to EndMBB.
+ EndMBB->splice(EndMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // The original block will now fall through to the XMM save block.
+ MBB->addSuccessor(XMMSaveMBB);
+ // The XMMSaveMBB will fall through to the end block.
+ XMMSaveMBB->addSuccessor(EndMBB);
+
+ // Now add the instructions.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned CountReg = MI->getOperand(0).getReg();
+ int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+ int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+ if (!Subtarget->isTargetWin64()) {
+ // If %al is 0, branch around the XMM save block.
+ BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+ BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ }
+
+ unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ // In the XMM save block, save all the XMM argument registers.
+ for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+ MachineMemOperand *MMO =
+ F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand::MOStore,
+ /*Size=*/16, /*Align=*/16);
+ BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
+ .addFrameIndex(RegSaveFrameIndex)
+ .addImm(/*Scale=*/1)
+ .addReg(/*IndexReg=*/0)
+ .addImm(/*Disp=*/Offset)
+ .addReg(/*Segment=*/0)
+ .addReg(MI->getOperand(i).getReg())
+ .addMemOperand(MMO);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+
+ return EndMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // If the EFLAGS register isn't dead in the terminator, then claim that it's
+ // live into the sink and copy blocks.
+ if (!MI->killsRegister(X86::EFLAGS)) {
+ copy0MBB->addLiveIn(X86::EFLAGS);
+ sinkMBB->addLiveIn(X86::EFLAGS);
+ }
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Create the conditional branch instruction.
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ copy0MBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
+ bool Is64Bit) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+
+ assert(EnableSegmentedStacks);
+
+ unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+ unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+
+ // BB:
+ // ... [Till the alloca]
+ // If stacklet is not large enough, jump to mallocMBB
+ //
+ // bumpMBB:
+ // Allocate by subtracting from RSP
+ // Jump to continueMBB
+ //
+ // mallocMBB:
+ // Allocate by call to runtime
+ //
+ // continueMBB:
+ // ...
+ // [rest of original BB]
+ //
+
+ MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+
+ unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
+ sizeVReg = MI->getOperand(1).getReg(),
+ physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+
+ MachineFunction::iterator MBBIter = BB;
+ ++MBBIter;
+
+ MF->insert(MBBIter, bumpMBB);
+ MF->insert(MBBIter, mallocMBB);
+ MF->insert(MBBIter, continueMBB);
+
+ continueMBB->splice(continueMBB->begin(), BB, llvm::next
+ (MachineBasicBlock::iterator(MI)), BB->end());
+ continueMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add code to the main basic block to check if the stack limit has been hit,
+ // and if so, jump to mallocMBB otherwise to bumpMBB.
+ BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg)
+ .addReg(tmpSPVReg).addReg(sizeVReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg)
+ .addReg(tmpSPVReg);
+ BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
+
+ // bumpMBB simply decreases the stack pointer, since we know the current
+ // stacklet has enough space.
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
+ .addReg(tmpSPVReg);
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
+ .addReg(tmpSPVReg);
+ BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Calls into a routine in libgcc to allocate more space from the heap.
+ if (Is64Bit) {
+ BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
+ .addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI);
+ } else {
+ BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+ .addImm(12);
+ BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space");
+ }
+
+ if (!Is64Bit)
+ BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+ .addImm(16);
+
+ BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+ .addReg(Is64Bit ? X86::RAX : X86::EAX);
+ BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Set up the CFG correctly.
+ BB->addSuccessor(bumpMBB);
+ BB->addSuccessor(mallocMBB);
+ mallocMBB->addSuccessor(continueMBB);
+ bumpMBB->addSuccessor(continueMBB);
+
+ // Take care of the PHI nodes.
+ BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(mallocPtrVReg).addMBB(mallocMBB)
+ .addReg(bumpSPPtrVReg).addMBB(bumpMBB);
+
+ // Delete the original pseudo instruction.
+ MI->eraseFromParent();
+
+ // And we're done.
+ return continueMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(!Subtarget->isTargetEnvMacho());
+
+ // The lowering is pretty easy: we're just emitting the call to _alloca. The
+ // non-trivial part is impdef of ESP.
+
+ if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetCygMing()) {
+ // ___chkstk(Mingw64):
+ // Clobbers R10, R11, RAX and EFLAGS.
+ // Updates RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("___chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::RSP, RegState::Implicit)
+ .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // __chkstk(MSVCRT): does not update stack pointer.
+ // Clobbers R10, R11 and EFLAGS.
+ // FIXME: RAX(allocated size) might be reused and not killed.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("__chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ // RAX has the offset to subtracted from RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(X86::RAX);
+ }
+ } else {
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(X86::EAX, RegState::Implicit)
+ .addReg(X86::ESP, RegState::Implicit)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // This is pretty easy. We're taking the value that we received from
+ // our load from the relocation, sticking it in either RDI (x86-64)
+ // or EAX and doing an indirect call. The return value will then
+ // be in the normal return register.
+ const X86InstrInfo *TII
+ = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *F = BB->getParent();
+
+ assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+ assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+ if (Subtarget->is64Bit()) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV64rm), X86::RDI)
+ .addReg(X86::RIP)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ addDirectMem(MIB, X86::RDI);
+ } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(0)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(TII->getGlobalBaseReg(F))
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unexpected instr type to insert");
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ assert(0 && "TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+ // On AMD64, additional defs should be added before register allocation.
+ if (!Subtarget->isTargetWin64()) {
+ MI->addRegisterDefined(X86::RSI);
+ MI->addRegisterDefined(X86::RDI);
+ MI->addRegisterDefined(X86::XMM6);
+ MI->addRegisterDefined(X86::XMM7);
+ MI->addRegisterDefined(X86::XMM8);
+ MI->addRegisterDefined(X86::XMM9);
+ MI->addRegisterDefined(X86::XMM10);
+ MI->addRegisterDefined(X86::XMM11);
+ MI->addRegisterDefined(X86::XMM12);
+ MI->addRegisterDefined(X86::XMM13);
+ MI->addRegisterDefined(X86::XMM14);
+ MI->addRegisterDefined(X86::XMM15);
+ }
+ return BB;
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
+ case X86::SEG_ALLOCA_32:
+ return EmitLoweredSegAlloca(MI, BB, false);
+ case X86::SEG_ALLOCA_64:
+ return EmitLoweredSegAlloca(MI, BB, true);
+ case X86::TLSCall_32:
+ case X86::TLSCall_64:
+ return EmitLoweredTLSCall(MI, BB);
+ case X86::CMOV_GR8:
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64:
+ case X86::CMOV_V8F32:
+ case X86::CMOV_V4F64:
+ case X86::CMOV_V4I64:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
+ return EmitLoweredSelect(MI, BB);
+
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM:
+ case X86::FP80_TO_INT16_IN_MEM:
+ case X86::FP80_TO_INT32_IN_MEM:
+ case X86::FP80_TO_INT64_IN_MEM: {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ MachineFunction *F = BB->getParent();
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned OldCW =
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
+ CWFrameIdx);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+ .addImm(0xC7F);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+ .addReg(OldCW);
+
+ // Get the X86 opcode to use.
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+ case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+ case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+ case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+ }
+
+ X86AddressMode AM;
+ MachineOperand &Op = MI->getOperand(0);
+ if (Op.isReg()) {
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Op.getReg();
+ } else {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = Op.getIndex();
+ }
+ Op = MI->getOperand(1);
+ if (Op.isImm())
+ AM.Scale = Op.getImm();
+ Op = MI->getOperand(2);
+ if (Op.isImm())
+ AM.IndexReg = Op.getImm();
+ Op = MI->getOperand(3);
+ if (Op.isGlobal()) {
+ AM.GV = Op.getGlobal();
+ } else {
+ AM.Disp = Op.getImm();
+ }
+ addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+ // String/text processing lowering.
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ return EmitPCMP(MI, BB, 3, false /* in-mem */);
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ return EmitPCMP(MI, BB, 3, true /* in-mem */);
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ return EmitPCMP(MI, BB, 5, false /* in mem */);
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+ // Thread synchronization.
+ case X86::MONITOR:
+ return EmitMonitor(MI, BB);
+ case X86::MWAIT:
+ return EmitMwait(MI, BB);
+
+ // Atomic Lowering.
+ case X86::ATOMAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+ X86::OR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMXOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+ X86::XOR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMNAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass, true);
+ case X86::ATOMMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+ case X86::ATOMMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+ case X86::ATOMUMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+ case X86::ATOMUMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+
+ case X86::ATOMAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
+ X86::OR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMXOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
+ X86::XOR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMNAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass, true);
+ case X86::ATOMMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
+ case X86::ATOMMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
+ case X86::ATOMUMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
+ case X86::ATOMUMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+
+ case X86::ATOMAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
+ X86::OR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMXOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
+ X86::XOR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMNAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass, true);
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ // This group is for 64-bit host.
+ case X86::ATOMAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
+ X86::OR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMXOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
+ X86::XOR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMNAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass, true);
+ case X86::ATOMMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+ case X86::ATOMMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
+ case X86::ATOMUMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+ case X86::ATOMUMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ false);
+ case X86::ATOMOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::OR32rr, X86::OR32rr,
+ X86::OR32ri, X86::OR32ri,
+ false);
+ case X86::ATOMXOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::XOR32rr, X86::XOR32rr,
+ X86::XOR32ri, X86::XOR32ri,
+ false);
+ case X86::ATOMNAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ true);
+ case X86::ATOMADD6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::ADD32rr, X86::ADC32rr,
+ X86::ADD32ri, X86::ADC32ri,
+ false);
+ case X86::ATOMSUB6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::SUB32rr, X86::SBB32rr,
+ X86::SUB32ri, X86::SBB32ri,
+ false);
+ case X86::ATOMSWAP6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::MOV32rr, X86::MOV32rr,
+ X86::MOV32ri, X86::MOV32ri,
+ false);
+ case X86::VASTART_SAVE_XMM_REGS:
+ return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ switch (Opc) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ // These nodes' second result is a boolean.
+ if (Op.getResNo() == 0)
+ break;
+ // Fallthrough
+ case X86ISD::SETCC:
+ KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
+ break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned NumLoBits = 0;
+ switch (IntId) {
+ default: break;
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse2_pmovmskb_128: {
+ // High bits of movmskp{s|d}, pmovmskb are known zero.
+ switch (IntId) {
+ case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
+ case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
+ case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
+ case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
+ }
+ KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - NumLoBits);
+ break;
+ }
+ }
+ break;
+ }
+ }
+}
+
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+ return Op.getValueType().getScalarType().getSizeInBits();
+
+ // Fallback case.
+ return 1;
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ const GlobalValue* &GA,
+ int64_t &Offset) const {
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+ return true;
+ }
+ }
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
+/// same as extracting the high 128-bit part of 256-bit vector and then
+/// inserting the result into the low part of a new 256-bit vector
+static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
+/// same as extracting the low 128-bit part of 256-bit vector and then
+/// inserting the result into the high part of a new 256-bit vector
+static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc dl = N->getDebugLoc();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+ V2.getOpcode() == ISD::CONCAT_VECTORS) {
+ //
+ // 0,0,0,...
+ // |
+ // V UNDEF BUILD_VECTOR UNDEF
+ // \ / \ /
+ // CONCAT_VECTOR CONCAT_VECTOR
+ // \ /
+ // \ /
+ // RESULT: V + zero extended
+ //
+ if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
+ V2.getOperand(1).getOpcode() != ISD::UNDEF ||
+ V1.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
+ return SDValue();
+
+ // To match the shuffle mask, the first half of the mask should
+ // be exactly the first vector, and all the rest a splat with the
+ // first element of the second one.
+ for (int i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
+ !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
+ return SDValue();
+
+ // Emit a zeroed vector and insert the desired subvector on its
+ // first half.
+ SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+ SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Combine some shuffles into subvector extracts and inserts:
+ //
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (isShuffleHigh128VectorInsertLow(SVOp)) {
+ SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+ V, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (isShuffleLow128VectorInsertHigh(SVOp)) {
+ SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+ V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ return SDValue();
+}
+
+/// PerformShuffleCombine - Performs several different shuffle combines.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Don't create instructions with illegal types after legalize types has run.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+ return SDValue();
+
+ // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
+ if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ N->getOpcode() == ISD::VECTOR_SHUFFLE)
+ return PerformShuffleCombine256(N, DAG, DCI);
+
+ // Only handle 128 wide vector from here on.
+ if (VT.getSizeInBits() != 128)
+ return SDValue();
+
+ // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
+ // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
+ // consecutive, non-overlapping, and in the right order.
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+}
+
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue InputVector = N->getOperand(0);
+
+ // Only operate on vectors of 4 elements, where the alternative shuffling
+ // gets to be more expensive.
+ if (InputVector.getValueType() != MVT::v4i32)
+ return SDValue();
+
+ // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+ // single use which is a sign-extend or zero-extend, and all elements are
+ // used.
+ SmallVector<SDNode *, 4> Uses;
+ unsigned ExtractedElements = 0;
+ for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+ UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != InputVector.getResNo())
+ return SDValue();
+
+ SDNode *Extract = *UI;
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ if (Extract->getValueType(0) != MVT::i32)
+ return SDValue();
+ if (!Extract->hasOneUse())
+ return SDValue();
+ if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+ Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+ return SDValue();
+
+ // Record which element was extracted.
+ ExtractedElements |=
+ 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+ Uses.push_back(Extract);
+ }
+
+ // If not all the elements were used, this may not be worthwhile.
+ if (ExtractedElements != 15)
+ return SDValue();
+
+ // Ok, we've now decided to do the transformation.
+ DebugLoc dl = InputVector.getDebugLoc();
+
+ // Store the value to a temporary stack slot.
+ SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Replace each use (extract) with a load of the appropriate element.
+ for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+ UE = Uses.end(); UI != UE; ++UI) {
+ SDNode *Extract = *UI;
+
+ // cOMpute the element's address.
+ SDValue Idx = Extract->getOperand(1);
+ unsigned EltSize =
+ InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+ uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ StackPtr, OffsetVal);
+
+ // Load the scalar.
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Replace the exact with the load.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+ }
+
+ // The replacement was made in place; don't return anything.
+ return SDValue();
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
+/// nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Cond = N->getOperand(0);
+ // Get the LHS/RHS of the select.
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT VT = LHS.getValueType();
+
+ // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+ // instructions match the semantics of the common C idiom x<y?x:y but not
+ // x<=y?x:y, because of how they handle negative zero (which can be
+ // ignored in unsafe-math mode).
+ if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
+ VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ (Subtarget->hasXMMInt() ||
+ (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ unsigned Opcode = 0;
+ // Check for x CC y ? x : y.
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ // Converting this to a min would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETOLE:
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETULE:
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOGE:
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGT:
+ // Converting this to a max would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGE:
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOGE:
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGT:
+ // Converting this to a min would handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGE:
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETULT:
+ // Converting this to a max would handle NaNs incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETOLE:
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETULE:
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ }
+
+ if (Opcode)
+ return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+ // Don't do this for crazy integer types.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+ // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+ // so that TrueC (the true value) is larger than FalseC.
+ bool NeedsCondInvert = false;
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+ // Efficiently invertible.
+ (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+ (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
+ isa<ConstantSDNode>(Cond.getOperand(1))))) {
+ NeedsCondInvert = true;
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
+ if (FalseC->getAPIntValue() == 0 &&
+ TrueC->getAPIntValue().isPowerOf2()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ return Cond;
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the flag operand isn't dead, don't touch this CMOV.
+ if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+ return SDValue();
+
+ SDValue FalseOp = N->getOperand(0);
+ SDValue TrueOp = N->getOperand(1);
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+ SDValue Cond = N->getOperand(3);
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ switch (Cond.getOpcode()) {
+ default: break;
+ case X86ISD::BSR:
+ case X86ISD::BSF:
+ // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
+ if (DAG.isKnownNeverZero(Cond.getOperand(0)))
+ return (CC == X86::COND_E) ? FalseOp : TrueOp;
+ }
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations. Note that the operands are ordered the opposite of SELECT
+ // operands.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
+ // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+ // larger than FalseC (the false value).
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
+ // This is efficient for any integer data type (including i8/i16) and
+ // shift amount.
+ if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
+ // for any integer data type, including i8/i16.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ uint64_t MulAmt = C->getZExtValue();
+ if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+ return SDValue();
+
+ uint64_t MulAmt1 = 0;
+ uint64_t MulAmt2 = 0;
+ if ((MulAmt % 9) == 0) {
+ MulAmt1 = 9;
+ MulAmt2 = MulAmt / 9;
+ } else if ((MulAmt % 5) == 0) {
+ MulAmt1 = 5;
+ MulAmt2 = MulAmt / 5;
+ } else if ((MulAmt % 3) == 0) {
+ MulAmt1 = 3;
+ MulAmt2 = MulAmt / 3;
+ }
+ if (MulAmt2 &&
+ (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ DebugLoc DL = N->getDebugLoc();
+
+ if (isPowerOf2_64(MulAmt2) &&
+ !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ // If second multiplifer is pow2, issue it first. We want the multiply by
+ // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+ // is an add.
+ std::swap(MulAmt1, MulAmt2);
+
+ SDValue NewMul;
+ if (isPowerOf2_64(MulAmt1))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(MulAmt1, VT));
+
+ if (isPowerOf2_64(MulAmt2))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+ DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+ DAG.getConstant(MulAmt2, VT));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, NewMul, false);
+ }
+ return SDValue();
+}
+
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+/// when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() && VT.isInteger() &&
+ N->getOpcode() == ISD::SHL)
+ return PerformSHLCombine(N, DAG);
+
+ // On X86 with SSE2 support, we can transform this to a vector shift if
+ // all elements are shifted by the same amount. We can't do this in legalize
+ // because the a constant vector is typically transformed to a constant pool
+ // so we have no knowledge of the shift amount.
+ if (!Subtarget->hasXMMInt())
+ return SDValue();
+
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+ return SDValue();
+
+ SDValue ShAmtOp = N->getOperand(1);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc DL = N->getDebugLoc();
+ SDValue BaseShAmt = SDValue();
+ if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != BaseShAmt) {
+ return SDValue();
+ }
+ }
+ } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
+ SDValue InVec = ShAmtOp.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+ DAG.getIntPtrConstant(0));
+ } else
+ return SDValue();
+
+ // The shift amount is an i32.
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
+
+ // The shift amount is identical so we can do a vector shift.
+ SDValue ValOp = N->getOperand(0);
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ break;
+ case ISD::SHL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRA:
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ }
+ return SDValue();
+}
+
+
+// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
+// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
+// and friends. Likewise for OR -> CMPNEQSS.
+static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ unsigned opcode;
+
+ // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
+ // we're requiring SSE2 for both.
+ if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CMP0 = N0->getOperand(1);
+ SDValue CMP1 = N1->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // The SETCCs should both refer to the same CMP.
+ if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+ return SDValue();
+
+ SDValue CMP00 = CMP0->getOperand(0);
+ SDValue CMP01 = CMP0->getOperand(1);
+ EVT VT = CMP00.getValueType();
+
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ bool ExpectingFlags = false;
+ // Check for any users that want flags:
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end();
+ !ExpectingFlags && UI != UE; ++UI)
+ switch (UI->getOpcode()) {
+ default:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::SELECT:
+ ExpectingFlags = true;
+ break;
+ case ISD::CopyToReg:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ break;
+ }
+
+ if (!ExpectingFlags) {
+ enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+
+ if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
+ X86::CondCode tmp = cc0;
+ cc0 = cc1;
+ cc1 = tmp;
+ }
+
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
+ bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+ X86ISD::NodeType NTOperator = is64BitFP ?
+ X86ISD::FSETCCsd : X86ISD::FSETCCss;
+ // FIXME: need symbolic constants for these magic numbers.
+ // See X86ATTInstPrinter.cpp:printSSECC().
+ unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+ SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ DAG.getConstant(x86cc, MVT::i8));
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+ OnesOrZeroesF);
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
+ DAG.getConstant(1, MVT::i32));
+ SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+ return OneBitOfTruth;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
+/// so it can be folded inside ANDNP.
+static bool CanFoldXORWithAllOnes(const SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Match direct AllOnes for 128 and 256-bit vectors
+ if (ISD::isBuildVectorAllOnes(N))
+ return true;
+
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ // Sometimes the operand may come from a insert_subvector building a 256-bit
+ // allones vector
+ if (VT.getSizeInBits() == 256 &&
+ N->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+
+ if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V1.getOperand(0).getOpcode() == ISD::UNDEF &&
+ ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
+ ISD::isBuildVectorAllOnes(V2.getNode()))
+ return true;
+ }
+
+ return false;
+}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
+ EVT VT = N->getValueType(0);
+
+ // Create ANDN instructions
+ if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for not
+ if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1);
+ // Check RHS for not
+ if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+ }
+
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ if (VT == MVT::v2i64) {
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::ANDNP)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and ANDNP and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ break;
+ default: return SDValue();
+ }
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+ unsigned Opc = 0;
+ switch (EltBits) {
+ case 8: Opc = X86ISD::PSIGNB; break;
+ case 16: Opc = X86ISD::PSIGNW; break;
+ case 32: Opc = X86ISD::PSIGND; break;
+ default: break;
+ }
+ if (Opc) {
+ SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+ }
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
+ return SDValue();
+
+ X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+ Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ }
+ }
+ }
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SDValue ShAmt0 = N0.getOperand(1);
+ if (ShAmt0.getValueType() != MVT::i8)
+ return SDValue();
+ SDValue ShAmt1 = N1.getOperand(1);
+ if (ShAmt1.getValueType() != MVT::i8)
+ return SDValue();
+ if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+ ShAmt0 = ShAmt0.getOperand(0);
+ if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+ ShAmt1 = ShAmt1.getOperand(0);
+
+ DebugLoc DL = N->getDebugLoc();
+ unsigned Opc = X86ISD::SHLD;
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N1.getOperand(0);
+ if (ShAmt0.getOpcode() == ISD::SUB) {
+ Opc = X86ISD::SHRD;
+ std::swap(Op0, Op1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+
+ unsigned Bits = VT.getSizeInBits();
+ if (ShAmt1.getOpcode() == ISD::SUB) {
+ SDValue Sum = ShAmt1.getOperand(0);
+ if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+ if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+ ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+ if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
+ return DAG.getNode(Opc, DL, VT,
+ Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+ } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if (ShAmt0C &&
+ ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
+ return DAG.getNode(Opc, DL, VT,
+ N0.getOperand(0), N1.getOperand(0),
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+
+ return SDValue();
+}
+
+/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
+static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ EVT RegVT = Ld->getValueType(0);
+ EVT MemVT = Ld->getMemoryVT();
+ DebugLoc dl = Ld->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ ISD::LoadExtType Ext = Ld->getExtensionType();
+
+ // If this is a vector EXT Load then attempt to optimize it using a
+ // shuffle. We need SSE4 for the shuffles.
+ // TODO: It is possible to support ZExt by zeroing the undef values
+ // during the shuffle phase or after the shuffle.
+ if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ assert(MemVT != RegVT && "Cannot extend to the same type");
+ assert(MemVT.isVector() && "Must load a vector from memory");
+
+ unsigned NumElems = RegVT.getVectorNumElements();
+ unsigned RegSz = RegVT.getSizeInBits();
+ unsigned MemSz = MemVT.getSizeInBits();
+ assert(RegSz > MemSz && "Register size must be greater than the mem size");
+ // All sizes must be a power of two
+ if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
+
+ // Attempt to load the original value using a single load op.
+ // Find a scalar type which is equal to the loaded word size.
+ MVT SclrLoadTy = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) {
+ SclrLoadTy = Tp;
+ break;
+ }
+ }
+
+ // Proceed if a load word is found.
+ if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
+
+ EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
+ RegSz/SclrLoadTy.getSizeInBits());
+
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ RegSz/MemVT.getScalarType().getSizeInBits());
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ // Perform a single load.
+ SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->getAlignment());
+
+ // Insert the word loaded into a vector.
+ SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ LoadUnitVecVT, ScalarLoad);
+
+ // Bitcast the loaded value to a vector of the original element type, in
+ // the size of the target vector type.
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector);
+ unsigned SizeRatio = RegSz/MemSz;
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(SlicedVec.getValueType()),
+ ShuffleVec.data());
+
+ // Bitcast to the requested type.
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ // Replace the original load with the new sequence
+ // and return the new chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff);
+ return SDValue(ScalarLoad.getNode(), 1);
+ }
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT VT = St->getValue().getValueType();
+ EVT StVT = St->getMemoryVT();
+ DebugLoc dl = St->getDebugLoc();
+ SDValue StoredVal = St->getOperand(1);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we are saving a concatination of two XMM registers, perform two stores.
+ // This is better in Sandy Bridge cause one 256-bit mem op is done via two
+ // 128-bit ones. If in the future the cost becomes only one memory access the
+ // first version would be better.
+ if (VT.getSizeInBits() == 256 &&
+ StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
+ StoredVal.getNumOperands() == 2) {
+
+ SDValue Value0 = StoredVal.getOperand(0);
+ SDValue Value1 = StoredVal.getOperand(1);
+
+ SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
+ SDValue Ptr0 = St->getBasePtr();
+ SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
+
+ SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+ }
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store.
+ // First, pack all of the elements in one place. Next, store to memory
+ // in fewer chunks.
+ if (St->isTruncatingStore() && VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromSz) % ToSz) return SDValue();
+
+ unsigned SizeRatio = FromSz / ToSz;
+
+ assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StVT.getScalarType(), NumElems*SizeRatio);
+
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz)
+ StoreType = Tp;
+ }
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Ptr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(i));
+ SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ Chains.push_back(Ch);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
+
+ // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+ if (VT.getSizeInBits() != 64)
+ return SDValue();
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+ && Subtarget->hasXMMInt();
+ if ((VT.isVector() ||
+ (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ SDNode* LdVal = St->getValue().getNode();
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDValue, 8> Ops;
+ SDNode* ChainVal = St->getChain().getNode();
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == LdVal)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).getNode() == LdVal) {
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+
+ if (!Ld || !ISD::isNormalLoad(Ld))
+ return SDValue();
+
+ // If this is not the MMX case, i.e. we are just turning i64 load/store
+ // into f64 load/store, avoid the transformation if there are multiple
+ // uses of the loaded value.
+ if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+ return SDValue();
+
+ DebugLoc LdDL = Ld->getDebugLoc();
+ DebugLoc StDL = N->getDebugLoc();
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+ // pair instead.
+ if (Subtarget->is64Bit() || F64IsLegal) {
+ EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->getAlignment());
+ SDValue NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewChain);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+ St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
+ }
+
+ // Otherwise, lower to two pairs of 32-bit loads / stores.
+ SDValue LoAddr = Ld->getBasePtr();
+ SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+ SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDValue NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+ St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
+ SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+ St->getPointerInfo().getWithOffset(4),
+ St->isVolatile(),
+ St->isNonTemporal(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+ }
+ return SDValue();
+}
+
+/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
+/// and return the operands for the horizontal operation in LHS and RHS. A
+/// horizontal operation performs the binary operation on successive elements
+/// of its first operand, then on successive elements of its second operand,
+/// returning the resulting values in a vector. For example, if
+/// A = < float a0, float a1, float a2, float a3 >
+/// and
+/// B = < float b0, float b1, float b2, float b3 >
+/// then the result of doing a horizontal operation on A and B is
+/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
+/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
+/// A horizontal-op B, for some already available A and B, and if so then LHS is
+/// set to A, RHS to B, and the routine returns 'true'.
+/// Note that the binary operation should have the property that if one of the
+/// operands is UNDEF then the result is UNDEF.
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+ // Look for the following pattern: if
+ // A = < float a0, float a1, float a2, float a3 >
+ // B = < float b0, float b1, float b2, float b3 >
+ // and
+ // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
+ // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
+ // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
+ // which is A horizontal-op B.
+
+ // At least one of the operands should be a vector shuffle.
+ if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
+ RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ EVT VT = LHS.getValueType();
+ unsigned N = VT.getVectorNumElements();
+
+ // View LHS in the form
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // If LHS is not a shuffle then pretend it is the shuffle
+ // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
+ // NOTE: in what follows a default initialized SDValue represents an UNDEF of
+ // type VT.
+ SDValue A, B;
+ SmallVector<int, 8> LMask(N);
+ if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ A = LHS.getOperand(0);
+ if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ B = LHS.getOperand(1);
+ cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask);
+ } else {
+ if (LHS.getOpcode() != ISD::UNDEF)
+ A = LHS;
+ for (unsigned i = 0; i != N; ++i)
+ LMask[i] = i;
+ }
+
+ // Likewise, view RHS in the form
+ // RHS = VECTOR_SHUFFLE C, D, RMask
+ SDValue C, D;
+ SmallVector<int, 8> RMask(N);
+ if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ C = RHS.getOperand(0);
+ if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ D = RHS.getOperand(1);
+ cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask);
+ } else {
+ if (RHS.getOpcode() != ISD::UNDEF)
+ C = RHS;
+ for (unsigned i = 0; i != N; ++i)
+ RMask[i] = i;
+ }
+
+ // Check that the shuffles are both shuffling the same vectors.
+ if (!(A == C && B == D) && !(A == D && B == C))
+ return false;
+
+ // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
+ if (!A.getNode() && !B.getNode())
+ return false;
+
+ // If A and B occur in reverse order in RHS, then "swap" them (which means
+ // rewriting the mask).
+ if (A != C)
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned Idx = RMask[i];
+ if (Idx < N)
+ RMask[i] += N;
+ else if (Idx < 2*N)
+ RMask[i] -= N;
+ }
+
+ // At this point LHS and RHS are equivalent to
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // RHS = VECTOR_SHUFFLE A, B, RMask
+ // Check that the masks correspond to performing a horizontal operation.
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned LIdx = LMask[i], RIdx = RMask[i];
+
+ // Ignore any UNDEF components.
+ if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
+ || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+ continue;
+
+ // Check that successive elements are being operated on. If not, this is
+ // not a horizontal operation.
+ if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
+ !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+ return false;
+ }
+
+ LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
+ RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+ return true;
+}
+
+/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
+static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ isHorizontalBinOp(LHS, RHS, true))
+ return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
+/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
+static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal subs from subs of shuffles.
+ if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ isHorizontalBinOp(LHS, RHS, false))
+ return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+ // F[X]OR(0.0, x) -> x
+ // F[X]OR(x, 0.0) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ return SDValue();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // FAND(0.0, x) -> 0.0
+ // FAND(x, 0.0) -> 0.0
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // BT ignores high bits in the bit index operand.
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.hasOneUse()) {
+ unsigned BitWidth = Op1.getValueSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return SDValue();
+}
+
+static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ EVT VT = N->getValueType(0), OpVT = Op.getValueType();
+ if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
+ VT.getVectorElementType().getSizeInBits() ==
+ OpVT.getVectorElementType().getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+ }
+ return SDValue();
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+ // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
+ // (and (i32 x86isd::setcc_carry), 1)
+ // This eliminates the zext. This transformation is necessary because
+ // ISD::SETCC is always legalized to i8.
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() == ISD::AND &&
+ N0.hasOneUse() &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() != X86ISD::SETCC_CARRY)
+ return SDValue();
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
+
+ return SDValue();
+}
+
+// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned X86CC = N->getConstantOperandVal(0);
+ SDValue EFLAG = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+ // a zext and produces an all-ones bit which is more useful than 0/1 in some
+ // cases.
+ if (X86CC == X86::COND_B)
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(1, MVT::i8));
+
+ return SDValue();
+}
+
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86TargetLowering *XTLI) {
+ SDValue Op0 = N->getOperand(0);
+ // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
+ // a 32-bit target where SSE doesn't support i64->FP operations.
+ if (Op0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
+ EVT VT = Ld->getValueType(0);
+ if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
+ !XTLI->getSubtarget()->is64Bit() &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+ Ld->getChain(), Op0, DAG);
+ DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
+ return FILDChain;
+ }
+ }
+ return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+ X86TargetLowering::DAGCombinerInfo &DCI) {
+ // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+ // the result is either zero or one (depending on the input carry bit).
+ // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+ if (X86::isZeroNode(N->getOperand(0)) &&
+ X86::isZeroNode(N->getOperand(1)) &&
+ // We don't have a good way to replace an EFLAGS use, so only do this when
+ // dead right now.
+ SDValue(N, 1).use_empty()) {
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+ SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B,MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, VT));
+ return DCI.CombineTo(N, Res1, CarryOut);
+ }
+
+ return SDValue();
+}
+
+// fold (add Y, (sete X, 0)) -> adc 0, Y
+// (add Y, (setne X, 0)) -> sbb -1, Y
+// (sub (sete X, 0), Y) -> sbb 0, Y
+// (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // Look through ZExts.
+ SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+ if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC = Ext.getOperand(0);
+ if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SDValue Cmp = SetCC.getOperand(1);
+ if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+ !X86::isZeroNode(Cmp.getOperand(1)) ||
+ !Cmp.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+ DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ if (CC == X86::COND_NE)
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // X86 can't encode an immediate LHS of a sub. See if we can push the
+ // negation into a preceding instruction.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
+ // If the RHS of the sub is a XOR with one use and a constant, invert the
+ // immediate. Then add one to the LHS of the sub so we can turn
+ // X-Y -> X+~Y+1, saving one register.
+ if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
+ isa<ConstantSDNode>(Op1.getOperand(1))) {
+ APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
+ EVT VT = Op0.getValueType();
+ SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
+ Op1.getOperand(0),
+ DAG.getConstant(~XorC, VT));
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
+ DAG.getConstant(C->getAPIntValue()+1, VT));
+ }
+ }
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ case ISD::VSELECT:
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
+ case ISD::SUB: return PerformSubCombine(N, DAG);
+ case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
+ case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
+ case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
+ case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
+ case X86ISD::FXOR:
+ case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG);
+ case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
+ case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
+ case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::SHUFPS: // Handle all target specific shuffles
+ case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+ }
+
+ return SDValue();
+}
+
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+ if (!isTypeLegal(VT))
+ return false;
+ if (VT != MVT::i16)
+ return true;
+
+ switch (Opc) {
+ default:
+ return true;
+ case ISD::LOAD:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SUB:
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return false;
+ }
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+ EVT VT = Op.getValueType();
+ if (VT != MVT::i16)
+ return false;
+
+ bool Promote = false;
+ bool Commute = false;
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ // If the non-extending load has a single use and it's not live out, then it
+ // might be folded.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+ Op.hasOneUse()*/) {
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ // The only case where we'd want to promote LOAD (rather then it being
+ // promoted as an operand is when it's only use is liveout.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ }
+ }
+ Promote = true;
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Promote = true;
+ break;
+ case ISD::SHL:
+ case ISD::SRL: {
+ SDValue N0 = Op.getOperand(0);
+ // Look out for (store (shl (load), x)).
+ if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+ return false;
+ Promote = true;
+ break;
+ }
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ Commute = true;
+ // fallthrough
+ case ISD::SUB: {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ if (!Commute && MayFoldLoad(N1))
+ return false;
+ // Avoid disabling potential load folding opportunities.
+ if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+ return false;
+ if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+ return false;
+ Promote = true;
+ }
+ }
+
+ PVT = MVT::i32;
+ return Promote;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+
+ std::string AsmStr = IA->getAsmString();
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+
+ // FIXME: this should verify that we are targeting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
+ // bswap $0
+ if (AsmPieces.size() == 2 &&
+ (AsmPieces[0] == "bswap" ||
+ AsmPieces[0] == "bswapq" ||
+ AsmPieces[0] == "bswapl") &&
+ (AsmPieces[1] == "$0" ||
+ AsmPieces[1] == "${0:q}")) {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ // rorw $$8, ${0:w} --> llvm.bswap.i16
+ if (CI->getType()->isIntegerTy(16) &&
+ AsmPieces.size() == 3 &&
+ (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
+ AsmPieces[1] == "$$8," &&
+ AsmPieces[2] == "${0:w}" &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ break;
+ case 3:
+ if (CI->getType()->isIntegerTy(32) &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+ Words[2] == "$0") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ }
+ }
+ }
+
+ if (CI->getType()->isIntegerTy(64)) {
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ if (Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'f':
+ case 't':
+ case 'u':
+ case 'y':
+ case 'x':
+ case 'Y':
+ case 'l':
+ return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
+ case 'e':
+ case 'Z':
+ return C_Other;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+ weight = CW_Register;
+ break;
+ case 'I':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (C->getZExtValue() <= 31)
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
+ }
+ break;
+ }
+ return weight;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(EVT ConstraintVT) const {
+ // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+ // 'f' like normal targets.
+ if (ConstraintVT.isFloatingPoint()) {
+ if (Subtarget->hasXMMInt())
+ return "Y";
+ if (Subtarget->hasXMM())
+ return "x";
+ }
+
+ return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'I':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 31) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'J':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 63) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'K':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'N':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 255) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'e': {
+ // 32-bit signed value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getSExtValue())) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+ break;
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ }
+ return;
+ }
+ case 'Z': {
+ // 32-bit unsigned value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getZExtValue())) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ return;
+ }
+ case 'i': {
+ // Literal immediates are always ok.
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+ break;
+ }
+
+ // In any sort of PIC mode addresses need to be computed at runtime by
+ // adding in a register or some sort of table lookup. These can't
+ // be used as immediates.
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+ return;
+
+ // If we are in non-pic codegen mode, we allow the address of a global (with
+ // an optional displacement) to be used with 'i'.
+ GlobalAddressSDNode *GA = 0;
+ int64_t Offset = 0;
+
+ // Match either (GA), (GA+C), (GA+C1+C2), etc.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+ Offset += GA->getOffset();
+ break;
+ } else if (Op.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ } else if (Op.getOpcode() == ISD::SUB) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += -C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ }
+
+ // Otherwise, this isn't something we can handle, reject it.
+ return;
+ }
+
+ const GlobalValue *GV = GA->getGlobal();
+ // If we require an extra load to get this address, as in PIC mode, we
+ // can't accept it.
+ if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+ getTargetMachine())))
+ return;
+
+ Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ GA->getValueType(0), Offset);
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ // First, see if this is a constraint that directly corresponds to an LLVM
+ // register class.
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ // TODO: Slight differences here in allocation order and leaving
+ // RIP in the class. Do they matter any more here than they do
+ // in the normal allocation?
+ case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+ if (Subtarget->is64Bit()) {
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ else if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ else if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ else if (VT == MVT::i64 || VT == MVT::f64)
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ break;
+ }
+ // 32-bit fallthrough
+ case 'Q': // Q_REGS
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
+ else if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
+ else if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
+ else if (VT == MVT::i64)
+ return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
+ break;
+ case 'r': // GENERAL_REGS
+ case 'l': // INDEX_REGS
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ case 'R': // LEGACY_REGS
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
+ return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
+ case 'f': // FP Stack registers.
+ // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+ // value to the correct fpstack register class.
+ if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP32RegisterClass);
+ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP64RegisterClass);
+ return std::make_pair(0U, X86::RFP80RegisterClass);
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget->hasMMX()) break;
+ return std::make_pair(0U, X86::VR64RegisterClass);
+ case 'Y': // SSE_REGS if SSE2 allowed
+ if (!Subtarget->hasXMMInt()) break;
+ // FALL THROUGH.
+ case 'x': // SSE_REGS if SSE1 allowed
+ if (!Subtarget->hasXMM()) break;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(0U, X86::FR32RegisterClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(0U, X86::FR64RegisterClass);
+ // Vector types.
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(0U, X86::VR128RegisterClass);
+ }
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass*> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (Res.second == 0) {
+ // Map st(0) -> st(7) -> ST0
+ if (Constraint.size() == 7 && Constraint[0] == '{' &&
+ tolower(Constraint[1]) == 's' &&
+ tolower(Constraint[2]) == 't' &&
+ Constraint[3] == '(' &&
+ (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+ Constraint[5] == ')' &&
+ Constraint[6] == '}') {
+
+ Res.first = X86::ST0+Constraint[4]-'0';
+ Res.second = X86::RFP80RegisterClass;
+ return Res;
+ }
+
+ // GCC allows "st(0)" to be called just plain "st".
+ if (StringRef("{st}").equals_lower(Constraint)) {
+ Res.first = X86::ST0;
+ Res.second = X86::RFP80RegisterClass;
+ return Res;
+ }
+
+ // flags -> EFLAGS
+ if (StringRef("{flags}").equals_lower(Constraint)) {
+ Res.first = X86::EFLAGS;
+ Res.second = X86::CCRRegisterClass;
+ return Res;
+ }
+
+ // 'A' means EAX + EDX.
+ if (Constraint == "A") {
+ Res.first = X86::EAX;
+ Res.second = X86::GR32_ADRegisterClass;
+ return Res;
+ }
+ return Res;
+ }
+
+ // Otherwise, check to see if this is a register class of the wrong value
+ // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+ // turn into {ax},{dx}.
+ if (Res.second->hasType(VT))
+ return Res; // Correct type already, nothing to do.
+
+ // All of the single-register GCC register classes map their values onto
+ // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
+ // really want an 8-bit or 32-bit register, map to the appropriate register
+ // class and return the appropriate register.
+ if (Res.second == X86::GR16RegisterClass) {
+ if (VT == MVT::i8) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::AL; break;
+ case X86::DX: DestReg = X86::DL; break;
+ case X86::CX: DestReg = X86::CL; break;
+ case X86::BX: DestReg = X86::BL; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR8RegisterClass;
+ }
+ } else if (VT == MVT::i32) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::EAX; break;
+ case X86::DX: DestReg = X86::EDX; break;
+ case X86::CX: DestReg = X86::ECX; break;
+ case X86::BX: DestReg = X86::EBX; break;
+ case X86::SI: DestReg = X86::ESI; break;
+ case X86::DI: DestReg = X86::EDI; break;
+ case X86::BP: DestReg = X86::EBP; break;
+ case X86::SP: DestReg = X86::ESP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR32RegisterClass;
+ }
+ } else if (VT == MVT::i64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR64RegisterClass;
+ }
+ }
+ } else if (Res.second == X86::FR32RegisterClass ||
+ Res.second == X86::FR64RegisterClass ||
+ Res.second == X86::VR128RegisterClass) {
+ // Handle references to XMM physical registers that got mapped into the
+ // wrong class. This can happen with constraints like {xmm0} where the
+ // target independent register mapper will just pick the first match it can
+ // find, ignoring the required type.
+ if (VT == MVT::f32)
+ Res.second = X86::FR32RegisterClass;
+ else if (VT == MVT::f64)
+ Res.second = X86::FR64RegisterClass;
+ else if (X86::VR128RegisterClass->hasType(VT))
+ Res.second = X86::VR128RegisterClass;
+ }
+
+ return Res;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 0000000..342a5e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,979 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+ namespace X86ISD {
+ // X86 Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// BSF - Bit scan forward.
+ /// BSR - Bit scan reverse.
+ BSF,
+ BSR,
+
+ /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// X86::SHLDxx and X86::SHRDxx instructions.
+ SHLD,
+ SHRD,
+
+ /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// FSRL - Bitwise logical right shift of floating point values. These
+ /// corresponds to X86::PSRLDQ.
+ FSRL,
+
+ /// CALL - These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ CALL,
+
+ /// RDTSC_DAG - This operation implements the lowering for
+ /// readcyclecounter
+ RDTSC_DAG,
+
+ /// X86 compare and logical compare instructions.
+ CMP, COMI, UCOMI,
+
+ /// X86 bit-test instructions.
+ BT,
+
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
+ SETCC,
+
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
+
+ /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+ /// Operands are two FP values to compare; result is a mask of
+ /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
+ FSETCCss, FSETCCsd,
+
+ /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
+ /// result in an integer GPR. Needs masking for scalar result.
+ FGETSIGNx86,
+
+ /// X86 conditional moves. Operand 0 and operand 1 are the two values
+ /// to select from. Operand 2 is the condition code, and operand 3 is the
+ /// flag operand produced by a CMP or TEST instruction. It also writes a
+ /// flag result.
+ CMOV,
+
+ /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// 1 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// Wrapper - A wrapper node for TargetConstantPool,
+ /// TargetExternalSymbol, and TargetGlobalAddress.
+ Wrapper,
+
+ /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+ /// of an XMM vector, with the high word zero filled.
+ MOVQ2DQ,
+
+ /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// to an MMX vector. If you think this is too close to the previous
+ /// mnemonic, so do I; blame Intel.
+ MOVDQ2Q,
+
+ /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRB.
+ PEXTRB,
+
+ /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// of a destination 4 x floatvector.
+ INSERTPS,
+
+ /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRB.
+ PINSRB,
+
+ /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW, MMX_PINSRW,
+
+ /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ PSHUFB,
+
+ /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+ ANDNP,
+
+ /// PSIGNB/W/D - Copy integer sign.
+ PSIGNB, PSIGNW, PSIGND,
+
+ /// BLEND family of opcodes
+ BLENDV,
+
+ /// FHADD - Floating point horizontal add.
+ FHADD,
+
+ /// FHSUB - Floating point horizontal sub.
+ FHSUB,
+
+ /// FMAX, FMIN - Floating point max and min.
+ ///
+ FMAX, FMIN,
+
+ /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+ /// approximation. Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT, FRCP,
+
+ // TLSADDR - Thread Local Storage.
+ TLSADDR,
+
+ // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // thunk at the address from an earlier relocation.
+ TLSCALL,
+
+ // EH_RETURN - Exception Handling helpers.
+ EH_RETURN,
+
+ /// TC_RETURN - Tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ // VZEXT_MOVL - Vector move low and zero extend.
+ VZEXT_MOVL,
+
+ // VSHL, VSRL - Vector logical left / right shift.
+ VSHL, VSRL,
+
+ // CMPPD, CMPPS - Vector double/float comparison.
+ // CMPPD, CMPPS - Vector double/float comparison.
+ CMPPD, CMPPS,
+
+ // PCMP* - Vector integer comparisons.
+ PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+ PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+
+ // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, ADC, SBB, SMUL,
+ INC, DEC, OR, XOR, AND,
+
+ ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
+
+ UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+
+ // MUL_IMM - X86 specific multiply by immediate.
+ MUL_IMM,
+
+ // PTEST - Vector bitwise comparisons
+ PTEST,
+
+ // TESTP - Vector packed fp sign bitwise comparisons
+ TESTP,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ PALIGN,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ PSHUFHW_LD,
+ PSHUFLW_LD,
+ SHUFPD,
+ SHUFPS,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVSHDUP_LD,
+ MOVSLDUP_LD,
+ MOVLHPS,
+ MOVLHPD,
+ MOVHLPS,
+ MOVHLPD,
+ MOVLPS,
+ MOVLPD,
+ MOVSD,
+ MOVSS,
+ UNPCKLPS,
+ UNPCKLPD,
+ VUNPCKLPSY,
+ VUNPCKLPDY,
+ UNPCKHPS,
+ UNPCKHPD,
+ VUNPCKHPSY,
+ VUNPCKHPDY,
+ PUNPCKLBW,
+ PUNPCKLWD,
+ PUNPCKLDQ,
+ PUNPCKLQDQ,
+ PUNPCKHBW,
+ PUNPCKHWD,
+ PUNPCKHDQ,
+ PUNPCKHQDQ,
+ VPERMILPS,
+ VPERMILPSY,
+ VPERMILPD,
+ VPERMILPDY,
+ VPERM2F128,
+ VBROADCAST,
+
+ // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+ // according to %al. An operator is needed so that this can be expanded
+ // with control flow.
+ VASTART_SAVE_XMM_REGS,
+
+ // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ WIN_ALLOCA,
+
+ // SEG_ALLOCA - For allocating variable amounts of stack space when using
+ // segmented stacks. Check if the current stacklet has enough space, and
+ // falls back to heap allocation if not.
+ SEG_ALLOCA,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE,
+
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // Atomic 64-bit binary operations.
+ ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+ LCMPXCHG16_DAG,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// VAARG_64 - This instruction grabs the address of the next argument
+ /// from a va_list. (reads and modifies the va_list in memory)
+ VAARG_64
+
+ // WARNING: Do not add anything in the end unless you want the node to
+ // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+ // thought as target memory ops!
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+ /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFDMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+
+ /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+ bool isSHUFPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+ bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+ /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+ /// <2, 3, 2, 3>
+ bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
+ bool isMOVLPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+ /// as well as MOVLHPS.
+ bool isMOVLHPSMask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+ bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+ bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+ /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+ /// <0, 0, 1, 1>
+ bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+ /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+ /// <2, 2, 3, 3>
+ bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSS,
+ /// MOVSD, and MOVD, i.e. setting the lowest element.
+ bool isMOVLMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+ bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
+
+ /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+ bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
+
+ /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+ bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+
+ /// isVEXTRACTF128Index - Return true if the specified
+ /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+ /// suitable for input to VEXTRACTF128.
+ bool isVEXTRACTF128Index(SDNode *N);
+
+ /// isVINSERTF128Index - Return true if the specified
+ /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+ /// suitable for input to VINSERTF128.
+ bool isVINSERTF128Index(SDNode *N);
+
+ /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+ /// instructions.
+ unsigned getShuffleSHUFImmediate(SDNode *N);
+
+ /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+ /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
+ unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+ /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+ /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
+ unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+ /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+ /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+ unsigned getShufflePALIGNRImmediate(SDNode *N);
+
+ /// getExtractVEXTRACTF128Immediate - Return the appropriate
+ /// immediate to extract the specified EXTRACT_SUBVECTOR index
+ /// with VEXTRACTF128 instructions.
+ unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+ /// getInsertVINSERTF128Immediate - Return the appropriate
+ /// immediate to insert at the specified INSERT_SUBVECTOR index
+ /// with VINSERTF128 instructions.
+ unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
+ /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+ /// constant +0.0.
+ bool isZeroNode(SDValue Elt);
+
+ /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+ /// fit into displacement field of the instruction.
+ bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement = true);
+
+
+ /// isCalleePop - Determines whether the callee is required to pop its
+ /// own arguments. Callee pop is necessary to support tail calls.
+ bool isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool TailCallOpt);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ class X86TargetLowering : public TargetLowering {
+ public:
+ explicit X86TargetLowering(X86TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+ virtual const MCExpr *
+ LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB, unsigned uid,
+ MCContext &Ctx) const;
+
+ /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+ /// jumptable.
+ virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const;
+ virtual const MCExpr *
+ getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI, MCContext &Ctx) const;
+
+ /// getStackPtrReg - Return the stack pointer register we are using: either
+ /// ESP or RSP.
+ unsigned getStackPtrReg() const { return X86StackPtr; }
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. For X86, aggregates
+ /// that contains are placed at 16-byte boundaries while the rest are at
+ /// 4-byte boundaries.
+ virtual unsigned getByValTypeAlignment(Type *Ty) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If
+ /// 'NonScalarIntSafe' is true, that means it's safe to return a
+ /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+ /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+ /// constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ virtual EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe, bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+ return true;
+ }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ /// isTypeDesirableForOp - Return true if the target has native support for
+ /// the specified value type and it is 'desirable' to use the type for the
+ /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+ /// instruction encodings are longer and some i16 instructions are slow.
+ virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+ /// isTypeDesirable - Return true if the target has native support for the
+ /// specified value type and it is 'desirable' to use the type. e.g. On x86
+ /// i16 is legal, but undesirable since i16 instruction encodings are longer
+ /// and some i16 instructions are slow.
+ virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+ // operation that are sign bits.
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const;
+
+ virtual bool
+ isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ virtual ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// (e.g. {edx}), return the register number and the register class for the
+ /// register. This should only be used for C_Register constraints. On
+ /// error, this returns a register number of 0.
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a
+ /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in
+ /// unknown ways, such as incoming arguments, or copies from unknown
+ /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+ /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+ /// all instructions that define 32-bit values implicit zero-extend the
+ /// result out to 64 bits.
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// isShuffleMaskLegal - Targets can use this to indicate that they only
+ /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+ /// values are assumed to be legal.
+ virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const;
+
+ /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+ /// used by Targets can use this to indicate if there is a suitable
+ /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+ /// pool entry.
+ virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const;
+
+ /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// seek to shrink the FP constant of the specified type to a smaller type
+ /// in order to save space and / or reduce runtime.
+ virtual bool ShouldShrinkFPConstant(EVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !X86ScalarSSEf64 || VT == MVT::f80;
+ }
+
+ const X86Subtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+ /// getStackCookieLocation - Return true if the target stores stack
+ /// protector cookies at a fixed offset in some non-standard address
+ /// space, and populates the address space and offset as
+ /// appropriate.
+ virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
+ SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG) const;
+
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+ const X86RegisterInfo *RegInfo;
+ const TargetData *TD;
+
+ /// X86StackPtr - X86 physical register used as stack ptr.
+ unsigned X86StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf32;
+ bool X86ScalarSSEf64;
+
+ /// LegalFPImmediates - A list of legal fp immediates.
+ std::vector<APFloat> LegalFPImmediates;
+
+ /// addLegalFPImmediate - Indicate that this x86 target can instruction
+ /// select the specified FP immediate natively.
+ void addLegalFPImmediate(const APFloat& Imm) {
+ LegalFPImmediates.push_back(Imm);
+ }
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ // Call lowering helpers.
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+ bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
+ SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+ SDValue Chain, bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl) const;
+
+ unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG &DAG) const;
+
+ std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool isSigned) const;
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ // Utility functions to help LowerVECTOR_SHUFFLE
+ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ virtual EVT
+ getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, unsigned NewOp) const;
+
+ /// Utility function to emit string processing sse4.2 instructions
+ /// that return in xmm0.
+ /// This takes the instruction to expand, the associated machine basic
+ /// block, the number of args, and whether or not the second arg is
+ /// in memory or not.
+ MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+ unsigned argNum, bool inMem) const;
+
+ /// Utility functions to emit monitor and mwait instructions. These
+ /// need to make sure that the arguments to the intrinsic are in the
+ /// correct registers.
+ MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ /// Utility function to emit atomic bitwise operations (and, or, xor).
+ /// It takes the bitwise instruction to expand, the associated machine basic
+ /// block, and the associated X86 opcodes for reg/reg and reg/imm.
+ MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned loadOpc,
+ unsigned cxchgOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc = false) const;
+
+ MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc = false) const;
+
+ /// Utility function to emit atomic min and max. It takes the min/max
+ /// instruction to expand, the associated basic block, and the associated
+ /// cmov opcode for moving the min or max value.
+ MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned cmovOpc) const;
+
+ // Utility function to emit the low-level va_arg code for X86-64.
+ MachineBasicBlock *EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Utility function to emit the xmm reg save portion of va_start.
+ MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool Is64Bit) const;
+
+ MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ /// Emit nodes that will be selected as "test Op0,Op0", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
+
+ /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) const;
+ };
+
+ namespace X86 {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
+}
+
+#endif // X86ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 0000000..dd4f6a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,102 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
+}
+
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+}
+
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
+
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
+}
+
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+ "prefetch $addr", []>;
+
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+ "prefetchw $addr", []>;
+
+// "3DNowA" instructions
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 0000000..74b647a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1173 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins i32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
+ // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
+ OpSize; // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
+ // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+ // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Surprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+ "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+ "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+ "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+ "idiv{q}\t$src", []>;
+
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+ "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+ "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
+ (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
+ (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src1)),
+ (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ Operand imm8operand, SDPatternOperator imm8operator,
+ bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// HasOpSizePrefix - This bit is set to true if the instruction should have
+ /// the 0x66 operand size prefix. This is set for i16 types.
+ bit HasOpSizePrefix = hasOpSizePrefix;
+
+ /// HasREX_WPrefix - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+ Imm8 , i8imm , imm, i8imm , invalid_node,
+ 0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+ Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ 1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+ Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ 1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+ Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+ 1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+ // Infer instruction prefixes from type info.
+ let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+ let hasREX_WPrefix = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern, Format f = MRMDestReg>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f = MRMDestReg>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+ EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo,
+ (outs typeinfo.RegClass:$dst),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
+// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
+class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo, (outs),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+ EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+ EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+ EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ list<dag> pattern>
+ : ITy<opcode, MRMDestMem, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ : ITy<opcode, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src))],
+ opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern>
+ : ITy<0x82, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg, string operands>
+ : ITy<opcode, RawFrm, typeinfo,
+ (outs), (ins typeinfo.ImmOperand:$src),
+ mnemonic, operands, []> {
+ let ImmT = typeinfo.ImmEncoding;
+ let Uses = [areg];
+ let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnodeflag, SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...". It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+
+ def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+ X86and_flag, and, 1, 0>;
+defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+ X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+ X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+ X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+ X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+ defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+ 1, 0>;
+ defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+ 0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result. From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+ let isCommutable = 1 in {
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+ } // isCommutable
+
+ def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+ def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+ def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+ def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+ def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+ def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+ def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+ def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+ def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+ def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+ def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+ def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+
+ // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
+ // register class is constrained to GR8_NOREX.
+ let isPseudo = 1 in
+ def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
+ "", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// ANDN Instruction
+//
+multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
+ PatFrag ld_frag> {
+ def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>;
+ def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS,
+ (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 0000000..0245e5c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,184 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call. X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense. Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly. The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned Scale;
+ unsigned IndexReg;
+ int Disp;
+ const GlobalValue *GV;
+ unsigned GVOpFlags;
+
+ X86AddressMode()
+ : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
+ Base.Reg = 0;
+ }
+
+
+ void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+ assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+
+ if (BaseType == X86AddressMode::RegBase)
+ MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+ false, false, false, 0, false));
+ else {
+ assert(BaseType == X86AddressMode::FrameIndexBase);
+ MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+ }
+
+ MO.push_back(MachineOperand::CreateImm(Scale));
+ MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+ false, false, false, 0, false));
+
+ if (GV)
+ MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+ else
+ MO.push_back(MachineOperand::CreateImm(Disp));
+
+ MO.push_back(MachineOperand::CreateReg(0, false, false,
+ false, false, false, 0, false));
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+ // Because memory references are always represented with five
+ // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+ return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1,
+ unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else {
+ assert(AM.BaseType == X86AddressMode::FrameIndexBase);
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ }
+
+ MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+ if (AM.GV)
+ MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+ else
+ MIB.addImm(AM.Disp);
+
+ return MIB.addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Flags = 0;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+static inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ unsigned GlobalBaseReg, unsigned char OpFlags) {
+ //FIXME: factor this
+ return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+ .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 0000000..3a43b22
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ isCommutable = 1 in {
+ def #NAME#16rr
+ : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+ def #NAME#32rr
+ : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+ def #NAME#64rr
+ :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst,
+ (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+ }
+
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ def #NAME#16rm
+ : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ CondNode, EFLAGS))]>, TB, OpSize;
+ def #NAME#32rm
+ : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ def #NAME#64rm
+ :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+ let Uses = [EFLAGS] in {
+ def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+ def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+ } // Uses = [EFLAGS]
+}
+
+defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
+defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
+defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
+defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
+defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
+defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
+defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
+defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 0000000..da28690
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1737 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 8 bits.
+ return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction. This expands to code that looks like this:
+// call $next_inst
+// popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+ "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+ (outs GR64:$dst),
+ (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+ "#VAARG_64 $dst, $ap, $size, $mode, $align",
+ [(set GR64:$dst,
+ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+ (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets. These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86WinAlloca)]>;
+
+// When using segmented stacks these are lowered into instructions which first
+// check if the current stacklet has enough free memory. If it does, memory is
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
+// the heap.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in
+def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR32:$dst,
+ (X86SegAlloca GR32:$size))]>,
+ Requires<[In32BitMode]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in
+def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR64:$dst,
+ (X86SegAlloca GR64:$size))]>,
+ Requires<[In64BitMode]>;
+
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+ [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type. When
+// this happens, it is great. However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+ (SETBr)>;
+
+// (add OP, SETB) -> (adc OP, 0)
+def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
+ (ADC64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETB) -> (sbb OP, 0)
+def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETCC_CARRY) -> (adc OP, 0)
+def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC64ri8 GR64:$op, 0)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_addr32",
+ [(X86tlsaddr tls32addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_addr64",
+ [(X86tlsaddr tls64addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+ Uses = [RSP, RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure.
+let usesCustomInserter = 1, Uses = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+ "#CMOV_GR8 PSEUDO!",
+ [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+ imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+ "#CMOV_GR32* PSEUDO!",
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+ "#CMOV_GR16* PSEUDO!",
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ "#CMOV_RFP32 PSEUDO!",
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ "#CMOV_RFP64 PSEUDO!",
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ "#CMOV_RFP80 PSEUDO!",
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Uses = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMXOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMNAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "#ATOMMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
+
+// RegOpc corresponds to the mr version of the instruction
+// ImmOpc corresponds to the mi version of the instruction
+// ImmOpc8 corresponds to the mi8 version of the instruction
+// ImmMod corresponds to the instruction format of the mi and mi8 versions
+multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
+ Format ImmMod, string mnemonic> {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+
+def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, OpSize, LOCK;
+def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+}
+
+}
+
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+
+def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ isCodeGenOnly = 1 in
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+ "lock\n\t"
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
+ isCodeGenOnly = 1 in
+def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
+ "lock\n\t"
+ "cmpxchg16b\t$ptr",
+ [(X86cas16 addr:$ptr)]>, TB, LOCK,
+ Requires<[HasCmpxchg16b]>;
+
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+ "lock\n\t"
+ "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+ "lock\n\t"
+ "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+ "lock\n\t"
+ "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock\n\t"
+ "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+ "lock\n\t"
+ "xadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+ "lock\n\t"
+ "xadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+ "lock\n\t"
+ "xadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+ "lock\n\t"
+ "xadd{q}\t{$val, $ptr|$ptr, $val}",
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+}
+
+def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
+def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
+def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
+def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
+
+def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_8 addr:$dst, GR8 :$src)]>;
+def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_16 addr:$dst, GR16:$src)]>;
+def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_32 addr:$dst, GR32:$src)]>;
+def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_64 addr:$dst, GR64:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V8F32 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V8F32 PSEUDO!",
+ [(set VR256:$dst,
+ (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4F64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4F64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4I64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4I64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+ (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'. FIXME: This is really a hack, the
+// 'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+ Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+ (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+ (TCRETURNri GR32_TC:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi addr:$dst, imm:$off)>,
+ Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi64 addr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+ Instruction Inst64> {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+ sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+ (MOVZX32rr8 GR8 :$src), sub_16bit)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies. However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read. To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+ Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "", // orw/addw REG, REG
+ [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "", // orl/addl REG, REG
+ [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "", // orq/addq REG, REG
+ [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "", // orw/addw REG, imm8
+ [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "", // orw/addw REG, imm
+ [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "", // orl/addl REG, imm8
+ [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "", // orl/addl REG, imm
+ [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "", // orq/addq REG, imm8
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "", // orq/addq REG, imm
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri8
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo8XForm imm:$imm))),
+ sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+ (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (EXTRACT_SUBREG (MOVZX32rr8 (i8
+ (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
+ Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR16:$src, i8),
+ (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+ (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (EXTRACT_SUBREG (MOVSX32rr8
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
+ Requires<[In64BitMode]>;
+
+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+ (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+ (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+ (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+ (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+ (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+ (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+ (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+ (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+ (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+ (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+ (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+ (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+ (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+ (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+ (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+ (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+ (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 0000000..c228a0ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,304 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag timm:$amt)]>;
+ def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "retw\t$amt",
+ []>, OpSize;
+ def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ "lretl", []>;
+ def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ "lretq", []>;
+ def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lret\t$amt", []>;
+ def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp\t$dst", [(br bb:$dst)]>;
+ def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst", []>;
+ def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+ multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+ }
+}
+
+defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+ // These are the 32-bit versions of this instruction for the asmparser. In
+ // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+ // jecxz.
+ let Uses = [CX] in
+ def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+ let Uses = [ECX] in
+ def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+ // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+ // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+ // is jrcxz.
+ let Uses = [ECX] in
+ def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+ let Uses = [RCX] in
+ def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+ def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+ "ljmp{q}\t{*}$dst", []>;
+
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ "ljmp{w}\t{*}$dst", []>, OpSize;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+ "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i32imm_pcrel:$dst,variable_ops),
+ "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+ Requires<[In32BitMode]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+ Requires<[In32BitMode]>;
+
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+ "lcall{w}\t{*}$dst", []>, OpSize;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+ "lcall{l}\t{*}$dst", []>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ "callw\t$dst", []>, OpSize;
+ }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def TCRETURNdi : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+ def TCRETURNri : PseudoI<(outs),
+ (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi : PseudoI<(outs),
+ (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ // FIXME: The should be pseudo instructions that are lowered when going to
+ // mcinst.
+ def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL",
+ []>;
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+ "", []>; // FIXME: Remove encoding when JIT is dead.
+ let mayLoad = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+ "jmp{l}\t{*}$dst # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+ Requires<[In64BitMode, NotWin64]>;
+
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", []>;
+ }
+
+ // FIXME: We need to teach codegen about single list of call-clobbered
+ // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP] in {
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+ def WINCALL64m : I<0xFF, MRM2m, (outs),
+ (ins i64mem:$dst,variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call (loadi64 addr:$dst))]>,
+ Requires<[IsWin64]>;
+ }
+
+let isCall = 1, isCodeGenOnly = 1 in
+ // __chkstk(MSVC): clobber R10, R11 and EFLAGS.
+ // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+ let Defs = [RAX, R10, R11, RSP, EFLAGS],
+ Uses = [RSP] in {
+ def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ }
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP],
+ usesCustomInserter = 1 in {
+ def TCRETURNdi64 : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+ []>;
+ def TCRETURNri64 : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi64 : PseudoI<(outs),
+ (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL", []>;
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+
+ let mayLoad = 1 in
+ def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 0000000..e62e6b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,151 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 0000000..d868773
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy> {
+ defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+ defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+ // Fused Multiply-Add
+ defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+ defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+ defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+ defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+ defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+ defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+ defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+ defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+ // Fused Negative Multiply-Add
+ defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+ defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+ defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+ defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 0000000..7cb870f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,648 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+ SDTCisVT<1, f80>]>;
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
+ def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+ def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+ [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+ [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+ [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+// All FP Stack operations are represented with four instructions here. The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information. These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler. These use "RST" registers, although frequently
+// the actual register(s) used are implicit. These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instruction for FP stack return values.
+def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+ [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m : FpIf32<(outs RFP32:$dst),
+ (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m : FpIf64<(outs RFP64:$dst),
+ (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
+ (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst),
+ (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst),
+ (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
+}
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
+}
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
+}
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
+}
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">;
+def COMP_FST0r : FPST0rInst <0xD8, "fcomp\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+
+// Versions of FP instructions that take a single memory operand. Added for the
+// disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
+def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+ def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+ CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ cc, EFLAGS))]>;
+ def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+ CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ cc, EFLAGS))]>;
+ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+ CondMovFP,
+ [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+ cc, EFLAGS))]>,
+ Requires<[HasCMov]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE : FPCMov<X86_COND_E>;
+defm CMOVP : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+} // Predicates = [HasCMov]
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+ def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+ [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1 in {
+def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1 in {
+def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+
+let mayStore = 1 in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
+ "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+
+
+// Floating point compares.
+let Defs = [EFLAGS] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+
+// CC = ST(0) cmp ST(i)
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [EFLAGS], Uses = [ST0] in {
+def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucom\t$reg">, DD;
+def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomp\t$reg">, DD;
+def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
+ (outs), (ins),
+ "fucompp">, DA;
+
+def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucomi\t$reg">, DB;
+def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucompi\t$reg">, DF;
+}
+
+def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+ "fcomi\t$reg">, DB;
+def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+ "fcompi\t$reg">, DF;
+
+// Floating point flag ops.
+let Defs = [AX] in
+def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw %ax", []>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
+ (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+ [(X86fp_cwd_get16 addr:$dst)]>;
+
+let mayLoad = 1 in
+def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
+def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
+ "ffree\t$reg">, DD;
+
+// Clear exceptions
+
+def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
+
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+
+def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
+def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
+def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9;
+def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9;
+def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9;
+def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9;
+def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9;
+def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9;
+def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9;
+def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9;
+def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9;
+def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9;
+def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9;
+def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9;
+def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9;
+def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9;
+def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9;
+def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9;
+def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9;
+def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9;
+def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
+
+def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+ RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+ RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+ RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+ RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+ Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack. We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+ Requires<[FPStackf64]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 0000000..0a1590b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,541 @@
+//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>; def RawFrm : Format<1>;
+def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
+def MRMSrcMem : Format<6>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
+def MRMInitReg : Format<32>;
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm8PCRel : ImmType<2>;
+def Imm16 : ImmType<3>;
+def Imm16PCRel : ImmType<4>;
+def Imm32 : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64 : ImmType<7>;
+
+// FPFormat - This specifies what form this FP instruction has. This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def NotFP : FPFormat<0>;
+def ZeroArgFP : FPFormat<1>;
+def OneArgFP : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP : FPFormat<4>;
+def CompareFP : FPFormat<5>;
+def CondMovFP : FPFormat<6>;
+def SpecialFP : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt : Domain<3>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
+class LOCK { bit hasLockPrefix = 1; }
+class SegFS { bits<2> SegOvrBits = 1; }
+class SegGS { bits<2> SegOvrBits = 2; }
+class TB { bits<5> Prefix = 1; }
+class REP { bits<5> Prefix = 2; }
+class D8 { bits<5> Prefix = 3; }
+class D9 { bits<5> Prefix = 4; }
+class DA { bits<5> Prefix = 5; }
+class DB { bits<5> Prefix = 6; }
+class DC { bits<5> Prefix = 7; }
+class DD { bits<5> Prefix = 8; }
+class DE { bits<5> Prefix = 9; }
+class DF { bits<5> Prefix = 10; }
+class XD { bits<5> Prefix = 11; }
+class XS { bits<5> Prefix = 12; }
+class T8 { bits<5> Prefix = 13; }
+class TA { bits<5> Prefix = 14; }
+class A6 { bits<5> Prefix = 15; }
+class A7 { bits<5> Prefix = 16; }
+class TF { bits<5> Prefix = 17; }
+class VEX { bit hasVEXPrefix = 1; }
+class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
+class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+ string AsmStr, Domain d = GenericDomain>
+ : Instruction {
+ let Namespace = "X86";
+
+ bits<8> Opcode = opcod;
+ Format Form = f;
+ bits<6> FormBits = Form.Value;
+ ImmType ImmT = i;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ string AsmString = AsmStr;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ //
+ // Attributes specific to X86 instructions...
+ //
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+
+ bits<5> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
+ FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
+ bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
+ bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain ExeDomain = d;
+ bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
+ bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
+ // to be encoded in a immediate field?
+ bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
+ bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+
+ // TSFlags layout should be kept in sync with X86InstrInfo.h.
+ let TSFlags{5-0} = FormBits;
+ let TSFlags{6} = hasOpSizePrefix;
+ let TSFlags{7} = hasAdSizePrefix;
+ let TSFlags{12-8} = Prefix;
+ let TSFlags{13} = hasREX_WPrefix;
+ let TSFlags{16-14} = ImmT.Value;
+ let TSFlags{19-17} = FPForm.Value;
+ let TSFlags{20} = hasLockPrefix;
+ let TSFlags{22-21} = SegOvrBits;
+ let TSFlags{24-23} = ExeDomain.Value;
+ let TSFlags{32-25} = Opcode;
+ let TSFlags{33} = hasVEXPrefix;
+ let TSFlags{34} = hasVEX_WPrefix;
+ let TSFlags{35} = hasVEX_4VPrefix;
+ let TSFlags{36} = hasVEX_i8ImmReg;
+ let TSFlags{37} = hasVEX_L;
+ let TSFlags{38} = ignoresVEX_L;
+ let TSFlags{39} = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+ : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+ let FPForm = fp;
+ let Pattern = pattern;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+// Iseg16 - 16-bit segment selector, 16-bit offset
+// Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+// VSSI - SSE1 instructions with XS prefix in AVX form.
+// VPSI - SSE1 instructions with TB prefix in AVX form.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
+ Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+// VSDI - SSE2 instructions with XD prefix in AVX form.
+// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
+ OpSize, Requires<[HasAVX]>;
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with TB and OpSize prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 prefix.
+// SS3AI - SSSE3 instructions with TA prefix.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. We put those instructions here because they better
+// fit into the SSSE3 instruction category rather than the MMX category.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+// SS48I - SSE 4.1 instructions with T8 prefix.
+// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+// SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
+
+// SS42FI - SSE 4.2 instructions with TF prefix.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+
+// SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
+
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8 and OpSize prefix.
+// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasAES]>;
+
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, Requires<[HasCLMUL]>;
+
+class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ OpSize, VEX_4V, Requires<[HasFMA3]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : VPDI<o, F, outs, ins, asm, pattern>, VEX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
new file mode 100644
index 0000000..af919fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -0,0 +1,467 @@
+//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides pattern fragments useful for SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
+def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
+def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
+def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86andnp : SDNode<"X86ISD::ANDNP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignb : SDNode<"X86ISD::PSIGNB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignw : SDNode<"X86ISD::PSIGNW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignd : SDNode<"X86ISD::PSIGND",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
+def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
+def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>;
+def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
+
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
+def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
+
+def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+
+def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
+def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
+def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
+def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
+
+def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+
+def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+// 128-bit load pattern fragments
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// 256-bit load pattern fragments
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// Like 'store', but always requires 128-bit vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'store', but always requires 256-bit vector alignment.
+def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 32;
+}]>;
+
+// Like 'load', but always requires 128-bit vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires 256-bit vector alignment.
+def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 32;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+ (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+ (f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+ (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+ (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+ (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+ (v2i64 (alignedload node:$ptr))>;
+
+// 256-bit aligned load pattern fragments
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload256 node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload256 node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+ (v8i32 (alignedload256 node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload256 node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
+// 128-bit bitconvert pattern fragments
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
+// a PALIGNR imm.
+def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePALIGNRImmediate(N));
+}]>;
+
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+ return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+ (extract_subvector node:$bigvec,
+ node:$index), [{
+ return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+ node:$index),
+ (insert_subvector node:$bigvec, node:$smallvec,
+ node:$index), [{
+ return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 0000000..3a02de0
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,3532 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <limits>
+
+#define GET_INSTRINFO_CTOR
+#include "X86GenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
+
+enum {
+ // Select which memory operand is being unfolded.
+ // (stored in bits 0 - 7)
+ TB_INDEX_0 = 0,
+ TB_INDEX_1 = 1,
+ TB_INDEX_2 = 2,
+ TB_INDEX_MASK = 0xff,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+
+ // Do not insert the reverse map (MemOp -> RegOp) into the table.
+ // This may be needed because there is a many -> one mapping.
+ TB_NO_REVERSE = 1 << 16,
+
+ // Do not insert the forward map (RegOp -> MemOp) into the table.
+ // This is needed for Native Client, which prohibits branch
+ // instructions from using a memory operand.
+ TB_NO_FORWARD = 1 << 17,
+
+ TB_FOLDED_LOAD = 1 << 18,
+ TB_FOLDED_STORE = 1 << 19
+};
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+ : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::ADJCALLSTACKDOWN64
+ : X86::ADJCALLSTACKDOWN32),
+ (tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::ADJCALLSTACKUP64
+ : X86::ADJCALLSTACKUP32)),
+ TM(tm), RI(tm, *this) {
+
+ static const unsigned OpTbl2Addr[][3] = {
+ { X86::ADC32ri, X86::ADC32mi, 0 },
+ { X86::ADC32ri8, X86::ADC32mi8, 0 },
+ { X86::ADC32rr, X86::ADC32mr, 0 },
+ { X86::ADC64ri32, X86::ADC64mi32, 0 },
+ { X86::ADC64ri8, X86::ADC64mi8, 0 },
+ { X86::ADC64rr, X86::ADC64mr, 0 },
+ { X86::ADD16ri, X86::ADD16mi, 0 },
+ { X86::ADD16ri8, X86::ADD16mi8, 0 },
+ { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
+ { X86::ADD16rr, X86::ADD16mr, 0 },
+ { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
+ { X86::ADD32ri, X86::ADD32mi, 0 },
+ { X86::ADD32ri8, X86::ADD32mi8, 0 },
+ { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32mr, 0 },
+ { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
+ { X86::ADD64ri32, X86::ADD64mi32, 0 },
+ { X86::ADD64ri8, X86::ADD64mi8, 0 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64mr, 0 },
+ { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
+ { X86::ADD8ri, X86::ADD8mi, 0 },
+ { X86::ADD8rr, X86::ADD8mr, 0 },
+ { X86::AND16ri, X86::AND16mi, 0 },
+ { X86::AND16ri8, X86::AND16mi8, 0 },
+ { X86::AND16rr, X86::AND16mr, 0 },
+ { X86::AND32ri, X86::AND32mi, 0 },
+ { X86::AND32ri8, X86::AND32mi8, 0 },
+ { X86::AND32rr, X86::AND32mr, 0 },
+ { X86::AND64ri32, X86::AND64mi32, 0 },
+ { X86::AND64ri8, X86::AND64mi8, 0 },
+ { X86::AND64rr, X86::AND64mr, 0 },
+ { X86::AND8ri, X86::AND8mi, 0 },
+ { X86::AND8rr, X86::AND8mr, 0 },
+ { X86::DEC16r, X86::DEC16m, 0 },
+ { X86::DEC32r, X86::DEC32m, 0 },
+ { X86::DEC64_16r, X86::DEC64_16m, 0 },
+ { X86::DEC64_32r, X86::DEC64_32m, 0 },
+ { X86::DEC64r, X86::DEC64m, 0 },
+ { X86::DEC8r, X86::DEC8m, 0 },
+ { X86::INC16r, X86::INC16m, 0 },
+ { X86::INC32r, X86::INC32m, 0 },
+ { X86::INC64_16r, X86::INC64_16m, 0 },
+ { X86::INC64_32r, X86::INC64_32m, 0 },
+ { X86::INC64r, X86::INC64m, 0 },
+ { X86::INC8r, X86::INC8m, 0 },
+ { X86::NEG16r, X86::NEG16m, 0 },
+ { X86::NEG32r, X86::NEG32m, 0 },
+ { X86::NEG64r, X86::NEG64m, 0 },
+ { X86::NEG8r, X86::NEG8m, 0 },
+ { X86::NOT16r, X86::NOT16m, 0 },
+ { X86::NOT32r, X86::NOT32m, 0 },
+ { X86::NOT64r, X86::NOT64m, 0 },
+ { X86::NOT8r, X86::NOT8m, 0 },
+ { X86::OR16ri, X86::OR16mi, 0 },
+ { X86::OR16ri8, X86::OR16mi8, 0 },
+ { X86::OR16rr, X86::OR16mr, 0 },
+ { X86::OR32ri, X86::OR32mi, 0 },
+ { X86::OR32ri8, X86::OR32mi8, 0 },
+ { X86::OR32rr, X86::OR32mr, 0 },
+ { X86::OR64ri32, X86::OR64mi32, 0 },
+ { X86::OR64ri8, X86::OR64mi8, 0 },
+ { X86::OR64rr, X86::OR64mr, 0 },
+ { X86::OR8ri, X86::OR8mi, 0 },
+ { X86::OR8rr, X86::OR8mr, 0 },
+ { X86::ROL16r1, X86::ROL16m1, 0 },
+ { X86::ROL16rCL, X86::ROL16mCL, 0 },
+ { X86::ROL16ri, X86::ROL16mi, 0 },
+ { X86::ROL32r1, X86::ROL32m1, 0 },
+ { X86::ROL32rCL, X86::ROL32mCL, 0 },
+ { X86::ROL32ri, X86::ROL32mi, 0 },
+ { X86::ROL64r1, X86::ROL64m1, 0 },
+ { X86::ROL64rCL, X86::ROL64mCL, 0 },
+ { X86::ROL64ri, X86::ROL64mi, 0 },
+ { X86::ROL8r1, X86::ROL8m1, 0 },
+ { X86::ROL8rCL, X86::ROL8mCL, 0 },
+ { X86::ROL8ri, X86::ROL8mi, 0 },
+ { X86::ROR16r1, X86::ROR16m1, 0 },
+ { X86::ROR16rCL, X86::ROR16mCL, 0 },
+ { X86::ROR16ri, X86::ROR16mi, 0 },
+ { X86::ROR32r1, X86::ROR32m1, 0 },
+ { X86::ROR32rCL, X86::ROR32mCL, 0 },
+ { X86::ROR32ri, X86::ROR32mi, 0 },
+ { X86::ROR64r1, X86::ROR64m1, 0 },
+ { X86::ROR64rCL, X86::ROR64mCL, 0 },
+ { X86::ROR64ri, X86::ROR64mi, 0 },
+ { X86::ROR8r1, X86::ROR8m1, 0 },
+ { X86::ROR8rCL, X86::ROR8mCL, 0 },
+ { X86::ROR8ri, X86::ROR8mi, 0 },
+ { X86::SAR16r1, X86::SAR16m1, 0 },
+ { X86::SAR16rCL, X86::SAR16mCL, 0 },
+ { X86::SAR16ri, X86::SAR16mi, 0 },
+ { X86::SAR32r1, X86::SAR32m1, 0 },
+ { X86::SAR32rCL, X86::SAR32mCL, 0 },
+ { X86::SAR32ri, X86::SAR32mi, 0 },
+ { X86::SAR64r1, X86::SAR64m1, 0 },
+ { X86::SAR64rCL, X86::SAR64mCL, 0 },
+ { X86::SAR64ri, X86::SAR64mi, 0 },
+ { X86::SAR8r1, X86::SAR8m1, 0 },
+ { X86::SAR8rCL, X86::SAR8mCL, 0 },
+ { X86::SAR8ri, X86::SAR8mi, 0 },
+ { X86::SBB32ri, X86::SBB32mi, 0 },
+ { X86::SBB32ri8, X86::SBB32mi8, 0 },
+ { X86::SBB32rr, X86::SBB32mr, 0 },
+ { X86::SBB64ri32, X86::SBB64mi32, 0 },
+ { X86::SBB64ri8, X86::SBB64mi8, 0 },
+ { X86::SBB64rr, X86::SBB64mr, 0 },
+ { X86::SHL16rCL, X86::SHL16mCL, 0 },
+ { X86::SHL16ri, X86::SHL16mi, 0 },
+ { X86::SHL32rCL, X86::SHL32mCL, 0 },
+ { X86::SHL32ri, X86::SHL32mi, 0 },
+ { X86::SHL64rCL, X86::SHL64mCL, 0 },
+ { X86::SHL64ri, X86::SHL64mi, 0 },
+ { X86::SHL8rCL, X86::SHL8mCL, 0 },
+ { X86::SHL8ri, X86::SHL8mi, 0 },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
+ { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
+ { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
+ { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
+ { X86::SHR16r1, X86::SHR16m1, 0 },
+ { X86::SHR16rCL, X86::SHR16mCL, 0 },
+ { X86::SHR16ri, X86::SHR16mi, 0 },
+ { X86::SHR32r1, X86::SHR32m1, 0 },
+ { X86::SHR32rCL, X86::SHR32mCL, 0 },
+ { X86::SHR32ri, X86::SHR32mi, 0 },
+ { X86::SHR64r1, X86::SHR64m1, 0 },
+ { X86::SHR64rCL, X86::SHR64mCL, 0 },
+ { X86::SHR64ri, X86::SHR64mi, 0 },
+ { X86::SHR8r1, X86::SHR8m1, 0 },
+ { X86::SHR8rCL, X86::SHR8mCL, 0 },
+ { X86::SHR8ri, X86::SHR8mi, 0 },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
+ { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
+ { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
+ { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
+ { X86::SUB16ri, X86::SUB16mi, 0 },
+ { X86::SUB16ri8, X86::SUB16mi8, 0 },
+ { X86::SUB16rr, X86::SUB16mr, 0 },
+ { X86::SUB32ri, X86::SUB32mi, 0 },
+ { X86::SUB32ri8, X86::SUB32mi8, 0 },
+ { X86::SUB32rr, X86::SUB32mr, 0 },
+ { X86::SUB64ri32, X86::SUB64mi32, 0 },
+ { X86::SUB64ri8, X86::SUB64mi8, 0 },
+ { X86::SUB64rr, X86::SUB64mr, 0 },
+ { X86::SUB8ri, X86::SUB8mi, 0 },
+ { X86::SUB8rr, X86::SUB8mr, 0 },
+ { X86::XOR16ri, X86::XOR16mi, 0 },
+ { X86::XOR16ri8, X86::XOR16mi8, 0 },
+ { X86::XOR16rr, X86::XOR16mr, 0 },
+ { X86::XOR32ri, X86::XOR32mi, 0 },
+ { X86::XOR32ri8, X86::XOR32mi8, 0 },
+ { X86::XOR32rr, X86::XOR32mr, 0 },
+ { X86::XOR64ri32, X86::XOR64mi32, 0 },
+ { X86::XOR64ri8, X86::XOR64mi8, 0 },
+ { X86::XOR64rr, X86::XOR64mr, 0 },
+ { X86::XOR8ri, X86::XOR8mi, 0 },
+ { X86::XOR8rr, X86::XOR8mr, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+ unsigned RegOp = OpTbl2Addr[i][0];
+ unsigned MemOp = OpTbl2Addr[i][1];
+ unsigned Flags = OpTbl2Addr[i][2];
+ AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 0, folded load and store, no alignment requirement.
+ Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+ }
+
+ static const unsigned OpTbl0[][3] = {
+ { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
+ { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
+ { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
+ { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
+ { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
+ { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD },
+ { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
+ { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
+ { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
+ { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
+ { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
+ { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
+ { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
+ { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
+ { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
+ { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
+ { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
+ { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
+ { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
+ { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
+ { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
+ { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
+ { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
+ { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
+ { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
+ { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
+ { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
+ { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
+ { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
+ { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
+ { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
+ { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
+ { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
+ { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
+ { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
+ { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
+ { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
+ { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
+ { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
+ { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
+ { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
+ { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
+ { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
+ { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
+ { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
+ { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
+ { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
+ { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
+ { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
+ { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
+ { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
+ { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
+ { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
+ { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
+ { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
+ { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
+ { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
+ { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
+ { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
+ { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
+ { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
+ { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
+ { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
+ { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
+ { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
+ { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
+ { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
+ { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
+ { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1];
+ unsigned Flags = OpTbl0[i][2];
+ AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
+ RegOp, MemOp, TB_INDEX_0 | Flags);
+ }
+
+ static const unsigned OpTbl1[][3] = {
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
+ { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
+ { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 },
+ { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
+ { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
+ { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
+ { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
+ { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
+ { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
+ { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
+ { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
+ { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
+ { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
+ { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
+ { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
+ { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
+ { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 },
+ { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
+ { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
+ { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
+ { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
+ { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 },
+ { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 },
+ { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 },
+ { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 },
+ { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
+ { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+ unsigned RegOp = OpTbl1[i][0];
+ unsigned MemOp = OpTbl1[i][1];
+ unsigned Flags = OpTbl1[i][2];
+ AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 1, folded load
+ Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
+ }
+
+ static const unsigned OpTbl2[][3] = {
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
+ { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
+ { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
+ { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
+ { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 },
+ { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 },
+ { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 },
+ { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 },
+ { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 },
+ { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 },
+ { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 },
+ { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
+ { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
+ { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
+ { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
+ { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
+ { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
+ { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
+ { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
+ { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
+ { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
+ { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
+ { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
+ { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
+ { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
+ { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
+ { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
+ { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
+ { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
+ { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
+ { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
+ { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
+ { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
+ { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
+ { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
+ { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
+ { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
+ { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
+ { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
+ { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
+ { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
+ { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
+ { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
+ { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
+ { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
+ { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
+ { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
+ { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
+ { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
+ { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
+ { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
+ { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
+ { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
+ { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
+ { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
+ { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
+ { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 },
+ { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 },
+ { X86::VADDSDrr, X86::VADDSDrm, 0 },
+ { X86::VADDSSrr, X86::VADDSSrm, 0 },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
+ { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
+ { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
+ { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
+ { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 },
+ { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
+ { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
+ { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 },
+ { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 },
+ { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 },
+ { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 },
+ { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 },
+ { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 },
+ { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 },
+ { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 },
+ { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
+ { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 },
+ { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 },
+ { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 },
+ { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
+ { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
+ { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
+ { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
+ { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 },
+ { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 },
+ { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 },
+ { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 },
+ { X86::VMINSDrr, X86::VMINSDrm, 0 },
+ { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
+ { X86::VMINSSrr, X86::VMINSSrm, 0 },
+ { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
+ { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
+ { X86::VMULSDrr, X86::VMULSDrm, 0 },
+ { X86::VMULSSrr, X86::VMULSSrm, 0 },
+ { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 },
+ { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
+ { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
+ { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
+ { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
+ { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
+ { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
+ { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
+ { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
+ { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 },
+ { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 },
+ { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
+ { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
+ { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
+ { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }
+ // FIXME: add AVX 256-bit foldable instructions
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+ unsigned RegOp = OpTbl2[i][0];
+ unsigned MemOp = OpTbl2[i][1];
+ unsigned Flags = OpTbl2[i][2];
+ AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 2, folded load
+ Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+ }
+}
+
+void
+X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags) {
+ if ((Flags & TB_NO_FORWARD) == 0) {
+ assert(!R2MTable.count(RegOp) && "Duplicate entry!");
+ R2MTable[RegOp] = std::make_pair(MemOp, Flags);
+ }
+ if ((Flags & TB_NO_REVERSE) == 0) {
+ assert(!M2RTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ M2RTable[MemOp] = std::make_pair(RegOp, Flags);
+ }
+}
+
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ // It's not always legal to reference the low 8-bit of the larger
+ // register in 32-bit mode.
+ return false;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32: {
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ SubIdx = X86::sub_8bit;
+ break;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ SubIdx = X86::sub_16bit;
+ break;
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32:
+ SubIdx = X86::sub_32bit;
+ break;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const {
+ if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+ MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+ MI->getOperand(Op+1).getImm() == 1 &&
+ MI->getOperand(Op+2).getReg() == 0 &&
+ MI->getOperand(Op+3).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op).getIndex();
+ return true;
+ }
+ return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+ switch (Opcode) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ return true;
+ break;
+ }
+ return false;
+}
+
+static bool isFrameStoreOpcode(int Opcode) {
+ switch (Opcode) {
+ default: break;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV32mr:
+ case X86::MOV64mr:
+ case X86::ST_FpP64m:
+ case X86::MOVSSmr:
+ case X86::MOVSDmr:
+ case X86::MOVAPSmr:
+ case X86::MOVAPDmr:
+ case X86::MOVDQAmr:
+ case X86::VMOVSSmr:
+ case X86::VMOVSDmr:
+ case X86::VMOVAPSmr:
+ case X86::VMOVAPDmr:
+ case X86::VMOVDQAmr:
+ case X86::VMOVAPSYmr:
+ case X86::VMOVAPDYmr:
+ case X86::VMOVDQAYmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode()))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode()))
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
+ return MI->getOperand(X86::AddrNumOperands).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ MI->isInvariantLoad(AA)) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0 || BaseReg == X86::RIP)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+ }
+ return false;
+ }
+
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
+ }
+
+ // All other instructions marked M_REMATERIALIZABLE are always trivially
+ // rematerializable.
+ return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// a few instructions in each direction it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator E = MBB.end();
+
+ // For compile time consideration, if we are not able to determine the
+ // safety after visiting 4 instructions in each direction, we will assume
+ // it's not safe.
+ MachineBasicBlock::iterator Iter = I;
+ for (unsigned i = 0; Iter != E && i < 4; ++i) {
+ bool SeenDef = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == X86::EFLAGS) {
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ }
+
+ if (SeenDef)
+ // This instruction defines EFLAGS, no need to look any further.
+ return true;
+ ++Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != E && Iter->isDebugValue())
+ ++Iter;
+ }
+
+ // It is safe to clobber EFLAGS at the end of a block of no successor has it
+ // live in.
+ if (Iter == E) {
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ return true;
+ }
+
+ MachineBasicBlock::iterator B = MBB.begin();
+ Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
+ // If we make it to the beginning of the block, it's safe to clobber
+ // EFLAGS iff EFLAGS is not live-in.
+ if (Iter == B)
+ return !MBB.isLiveIn(X86::EFLAGS);
+
+ --Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != B && Iter->isDebugValue())
+ --Iter;
+
+ bool SawKill = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+ if (MO.isDef()) return MO.isDead();
+ if (MO.isKill()) SawKill = true;
+ }
+ }
+
+ if (SawKill)
+ // This instruction kills EFLAGS and doesn't redefine it, so
+ // there's no need to look further.
+ return true;
+ }
+
+ // Conservative answer.
+ return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ DebugLoc DL = Orig->getDebugLoc();
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // Re-materialize them as movri instructions to avoid side effects.
+ bool Clone = true;
+ unsigned Opc = Orig->getOpcode();
+ switch (Opc) {
+ default: break;
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
+ if (!isSafeToClobberEFLAGS(MBB, I)) {
+ switch (Opc) {
+ default: break;
+ case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
+ case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
+ }
+ Clone = false;
+ }
+ break;
+ }
+ }
+
+ if (Clone) {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MBB.insert(I, MI);
+ } else {
+ BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
+ }
+
+ MachineInstr *NewMI = prior(I);
+ NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+ .addReg(Src, getKillRegState(isKill));
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+ .addReg(Src2, getKillRegState(isKill2));
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand. This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ // All instructions input are two-addr instructions. Get the known operands.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ MachineInstr *NewMI = NULL;
+ // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
+ // we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
+ bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ unsigned MIOpc = MI->getOpcode();
+ switch (MIOpc) {
+ case X86::SHUFPSrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ return 0;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0).addReg(0);
+ break;
+ }
+ case X86::SHL32ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ // LEA can't handle ESP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ return 0;
+
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
+ break;
+ }
+ case X86::SHL16ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0).addReg(0);
+ break;
+ }
+ default: {
+ // The following opcodes also sets the condition code register(s). Only
+ // convert them to equivalent lea if the condition code register def's
+ // are dead!
+ if (hasLiveCondCodeDef(MI))
+ return 0;
+
+ switch (MIOpc) {
+ default: return 0;
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::INC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::DEC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ }
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+ Opc = X86::LEA64r;
+ RC = X86::GR64_NOSPRegisterClass;
+ } else {
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ RC = X86::GR32_NOSPRegisterClass;
+ }
+
+
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+ !MF.getRegInfo().constrainRegClass(Src2, RC))
+ return 0;
+
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ }
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ }
+ }
+ }
+
+ if (!NewMI) return 0;
+
+ if (LV) { // Update live variables
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, NewMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, NewMI);
+ }
+
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+ case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+ case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+ case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+ case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+ case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+ unsigned Opc;
+ unsigned Size;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+ case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+ case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+ case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+ case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+ case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+ }
+ unsigned Amt = MI->getOperand(3).getImm();
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ MI->getOperand(3).setImm(Size-Amt);
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+ case X86::CMOVB16rr:
+ case X86::CMOVB32rr:
+ case X86::CMOVB64rr:
+ case X86::CMOVAE16rr:
+ case X86::CMOVAE32rr:
+ case X86::CMOVAE64rr:
+ case X86::CMOVE16rr:
+ case X86::CMOVE32rr:
+ case X86::CMOVE64rr:
+ case X86::CMOVNE16rr:
+ case X86::CMOVNE32rr:
+ case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr:
+ case X86::CMOVBE32rr:
+ case X86::CMOVBE64rr:
+ case X86::CMOVA16rr:
+ case X86::CMOVA32rr:
+ case X86::CMOVA64rr:
+ case X86::CMOVL16rr:
+ case X86::CMOVL32rr:
+ case X86::CMOVL64rr:
+ case X86::CMOVGE16rr:
+ case X86::CMOVGE32rr:
+ case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr:
+ case X86::CMOVLE32rr:
+ case X86::CMOVLE64rr:
+ case X86::CMOVG16rr:
+ case X86::CMOVG32rr:
+ case X86::CMOVG64rr:
+ case X86::CMOVS16rr:
+ case X86::CMOVS32rr:
+ case X86::CMOVS64rr:
+ case X86::CMOVNS16rr:
+ case X86::CMOVNS32rr:
+ case X86::CMOVNS64rr:
+ case X86::CMOVP16rr:
+ case X86::CMOVP32rr:
+ case X86::CMOVP64rr:
+ case X86::CMOVNP16rr:
+ case X86::CMOVNP32rr:
+ case X86::CMOVNP64rr:
+ case X86::CMOVO16rr:
+ case X86::CMOVO32rr:
+ case X86::CMOVO64rr:
+ case X86::CMOVNO16rr:
+ case X86::CMOVNO32rr:
+ case X86::CMOVNO64rr: {
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
+ case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
+ case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
+ case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+ case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+ case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+ case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
+ case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
+ case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
+ case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+ case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+ case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+ case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+ case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+ case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+ case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
+ case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
+ case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
+ case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
+ case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
+ case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
+ case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+ case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+ case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+ case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+ case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+ case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+ case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
+ case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
+ case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
+ case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
+ case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
+ case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
+ case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+ case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+ case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+ case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
+ case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
+ case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
+ case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+ case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+ case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+ case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
+ case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
+ case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
+ case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+ case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+ case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+ }
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ // Fallthrough intended.
+ }
+ default:
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+ switch (BrOpc) {
+ default: return X86::COND_INVALID;
+ case X86::JE_4: return X86::COND_E;
+ case X86::JNE_4: return X86::COND_NE;
+ case X86::JL_4: return X86::COND_L;
+ case X86::JLE_4: return X86::COND_LE;
+ case X86::JG_4: return X86::COND_G;
+ case X86::JGE_4: return X86::COND_GE;
+ case X86::JB_4: return X86::COND_B;
+ case X86::JBE_4: return X86::COND_BE;
+ case X86::JA_4: return X86::COND_A;
+ case X86::JAE_4: return X86::COND_AE;
+ case X86::JS_4: return X86::COND_S;
+ case X86::JNS_4: return X86::COND_NS;
+ case X86::JP_4: return X86::COND_P;
+ case X86::JNP_4: return X86::COND_NP;
+ case X86::JO_4: return X86::COND_O;
+ case X86::JNO_4: return X86::COND_NO;
+ }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case X86::COND_E: return X86::JE_4;
+ case X86::COND_NE: return X86::JNE_4;
+ case X86::COND_L: return X86::JL_4;
+ case X86::COND_LE: return X86::JLE_4;
+ case X86::COND_G: return X86::JG_4;
+ case X86::COND_GE: return X86::JGE_4;
+ case X86::COND_B: return X86::JB_4;
+ case X86::COND_BE: return X86::JBE_4;
+ case X86::COND_A: return X86::JA_4;
+ case X86::COND_AE: return X86::JAE_4;
+ case X86::COND_S: return X86::JS_4;
+ case X86::COND_NS: return X86::JNS_4;
+ case X86::COND_P: return X86::JP_4;
+ case X86::COND_NP: return X86::JNP_4;
+ case X86::COND_O: return X86::JO_4;
+ case X86::COND_NO: return X86::JNO_4;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case X86::COND_E: return X86::COND_NE;
+ case X86::COND_NE: return X86::COND_E;
+ case X86::COND_L: return X86::COND_GE;
+ case X86::COND_LE: return X86::COND_G;
+ case X86::COND_G: return X86::COND_LE;
+ case X86::COND_GE: return X86::COND_L;
+ case X86::COND_B: return X86::COND_AE;
+ case X86::COND_BE: return X86::COND_A;
+ case X86::COND_A: return X86::COND_BE;
+ case X86::COND_AE: return X86::COND_B;
+ case X86::COND_S: return X86::COND_NS;
+ case X86::COND_NS: return X86::COND_S;
+ case X86::COND_P: return X86::COND_NP;
+ case X86::COND_NP: return X86::COND_P;
+ case X86::COND_O: return X86::COND_NO;
+ case X86::COND_NO: return X86::COND_O;
+ }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == X86::JMP_4) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditional destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == X86::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+ // If we can modify the code and it ends in something like:
+ //
+ // jCC L1
+ // jmp L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Then we can change this to:
+ //
+ // jnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Which is a bit more efficient.
+ // We conditionally jump to the fall-through block.
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ unsigned JNCC = GetCondBranchFromCond(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB());
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+ .addMBB(TargetBB);
+
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ // Restart the analysis.
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ // If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
+ if ((OldBranchCode == X86::COND_NP &&
+ BranchCode == X86::COND_E) ||
+ (OldBranchCode == X86::COND_E &&
+ BranchCode == X86::COND_NP))
+ BranchCode = X86::COND_NP_OR_E;
+ else if ((OldBranchCode == X86::COND_P &&
+ BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE &&
+ BranchCode == X86::COND_P))
+ BranchCode = X86::COND_NE_OR_P;
+ else
+ return true;
+
+ // Update the MachineOperand.
+ Cond[0].setImm(BranchCode);
+ }
+
+ return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != X86::JMP_4 &&
+ GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "X86 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+ switch (CC) {
+ case X86::COND_NP_OR_E:
+ // Synthesize NP_OR_E with two branches.
+ BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
+ ++Count;
+ break;
+ case X86::COND_NE_OR_P:
+ // Synthesize NE_OR_P with two branches.
+ BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
+ ++Count;
+ break;
+ default: {
+ unsigned Opc = GetCondBranchFromCond(CC);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ ++Count;
+ }
+ }
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+ return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
+ bool HasAVX) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ // SrcReg(FR32) -> DestReg(GR32)
+ // SrcReg(GR32) -> DestReg(FR32)
+
+ if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+
+ if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+
+ return 0;
+}
+
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // First deal with the normal symmetric copies.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ unsigned Opc = 0;
+ if (X86::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV64rr;
+ else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV32rr;
+ else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV16rr;
+ else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit()) {
+ Opc = X86::MOV8rr_NOREX;
+ // Both operands must be encodable without an REX prefix.
+ assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
+ "8-bit H register can not be copied outside GR8_NOREX");
+ } else
+ Opc = X86::MOV8rr;
+ } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
+ else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+ Opc = X86::VMOVAPSYrr;
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);
+
+ if (Opc) {
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ if (SrcReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
+ BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ return;
+ } else if (X86::GR32RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
+ BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+ return;
+ }
+ }
+ if (DestReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
+ return;
+ } else if (X86::GR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
+ return;
+ }
+ }
+
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
+}
+
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM,
+ bool load) {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ switch (RC->getSize()) {
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 1:
+ assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case 2:
+ assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case 4:
+ if (X86::GR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::FR32RegClass.hasSubClassEq(RC))
+ return load ?
+ (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
+ (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ if (X86::RFP32RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ llvm_unreachable("Unknown 4-byte regclass");
+ case 8:
+ if (X86::GR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::FR64RegClass.hasSubClassEq(RC))
+ return load ?
+ (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
+ (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ if (X86::VR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+ if (X86::RFP64RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ llvm_unreachable("Unknown 8-byte regclass");
+ case 10:
+ assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
+ return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+ case 16: {
+ assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ?
+ (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) :
+ (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
+ else
+ return load ?
+ (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) :
+ (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
+ case 32:
+ assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+ else
+ return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
+ }
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
+
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ NewMIs.push_back(MIB);
+}
+
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ NewMIs.push_back(MIB);
+}
+
+/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined. This is
+/// used for mapping:
+/// %xmm4 = V_SET0
+/// to:
+/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
+///
+static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+ assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+ unsigned Reg = MI->getOperand(0).getReg();
+ MI->setDesc(Desc);
+
+ // MachineInstr::addOperand() will insert explicit operands before any
+ // implicit operands.
+ MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ // But we don't trust that.
+ assert(MI->getOperand(1).getReg() == Reg &&
+ MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+ return true;
+}
+
+bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ switch (MI->getOpcode()) {
+ case X86::V_SET0:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+ case X86::TEST8ri_NOREX:
+ MI->setDesc(get(X86::TEST8ri));
+ return true;
+ }
+ return false;
+}
+
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ X86AddressMode AM;
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = FrameIx;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+ addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ // Create the base instruction with the memory operand as the first part.
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+
+ // Loop over the rest of the ri operands, converting them over.
+ unsigned NumOps = MI->getDesc().getNumOperands()-2;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i+2);
+ MIB.addOperand(MO);
+ }
+ for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ MIB.addOperand(MO);
+ }
+ return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+ unsigned Opcode, unsigned OpNo,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (i == OpNo) {
+ assert(MO.isReg() && "Expected to fold into reg operand!");
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI, unsigned i,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Align) const {
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ bool isTwoAddrFold = false;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOpcode() == X86::ADD32ri &&
+ MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return NULL;
+
+ MachineInstr *NewMI = NULL;
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && i < 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ isTwoAddrFold = true;
+ } else if (i == 0) { // If operand 0
+ if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV8r0)
+ NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+ if (NewMI)
+ return NewMI;
+
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (i == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (i == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ // If table selected...
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ OpcodeTablePtr->find(MI->getOpcode());
+ if (I != OpcodeTablePtr->end()) {
+ unsigned Opcode = I->second.first;
+ unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
+ if (Align < MinAlign)
+ return NULL;
+ bool NarrowToMOV32rm = false;
+ if (Size) {
+ unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
+ if (Size < RCSize) {
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+ return NULL;
+ // If this is a 64-bit load, but the spill slot is 32, then we can do
+ // a 32-bit load which is implicitly zero-extended. This likely is due
+ // to liveintervalanalysis remat'ing a load from stack slot.
+ if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+ return NULL;
+ Opcode = X86::MOV32rm;
+ NarrowToMOV32rm = true;
+ }
+ }
+
+ if (isTwoAddrFold)
+ NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+ else
+ NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+ if (NarrowToMOV32rm) {
+ // If this is the special case where we use a MOV32rm to load a 32-bit
+ // value and zero-extend the top bits. Change the destination register
+ // to a 32-bit one.
+ unsigned DstReg = NewMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+ X86::sub_32bit));
+ else
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
+ }
+ return NewMI;
+ }
+ }
+
+ // No fusion
+ if (PrintFailedFusing && !MI->isCopy())
+ dbgs() << "We failed to fuse operand " << i << " in " << *MI;
+ return NULL;
+}
+
+/// hasPartialRegUpdate - Return true for all instructions that only update
+/// the first 32 or 64-bits of the destination register and leave the rest
+/// unmodified. This can be used to avoid folding loads if the instructions
+/// only update part of the destination register, and the non-updated part is
+/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
+/// instructions breaks the partial register dependency and it can improve
+/// performance. e.g.:
+///
+/// movss (%rdi), %xmm0
+/// cvtss2sd %xmm0, %xmm0
+///
+/// Instead of
+/// cvtss2sd (%rdi), %xmm0
+///
+/// FIXME: This should be turned into a TSFlags.
+///
+static bool hasPartialRegUpdate(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ // AVX encoded versions
+ case X86::VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrr:
+ case X86::VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrr:
+ case X86::VRCPSSr:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSSr:
+ case X86::VRSQRTSSr:
+ case X86::VSQRTSSr:
+ return true;
+ }
+
+ return false;
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = MFI->getObjectSize(FrameIndex);
+ unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ unsigned RCSize = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
+ }
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Size < RCSize)
+ return NULL;
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,4> MOs;
+ MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // Determine the alignment of the load.
+ unsigned Alignment = 0;
+ if (LoadMI->hasOneMemOperand())
+ Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+ else
+ switch (LoadMI->getOpcode()) {
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ case X86::AVX_SETALLONES:
+ Alignment = 16;
+ break;
+ case X86::FsFLD0SD:
+ case X86::VFsFLD0SD:
+ Alignment = 8;
+ break;
+ case X86::FsFLD0SS:
+ case X86::VFsFLD0SS:
+ Alignment = 4;
+ break;
+ default:
+ return 0;
+ }
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
+ SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
+ switch (LoadMI->getOpcode()) {
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ case X86::AVX_SETALLONES:
+ case X86::FsFLD0SD:
+ case X86::FsFLD0SS:
+ case X86::VFsFLD0SD:
+ case X86::VFsFLD0SS: {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Create a constant-pool entry and operands to load from it.
+
+ // Medium and large mode can't fold loads this way.
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Kernel)
+ return NULL;
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ PICBase = X86::RIP;
+ else
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return NULL;
+ }
+
+ // Create a constant-pool entry.
+ MachineConstantPool &MCP = *MF.getConstantPool();
+ Type *Ty;
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ Ty = Type::getFloatTy(MF.getFunction()->getContext());
+ else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+ else
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
+ Constant::getNullValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
+
+ // Create operands to load from the constant pool entry.
+ MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+ MOs.push_back(MachineOperand::CreateImm(1));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ break;
+ }
+ default: {
+ // Folding a normal load. Just copy the load's address operands.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
+ MOs.push_back(LoadMI->getOperand(i));
+ break;
+ }
+ }
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // Check switch flag
+ if (NoFusing) return 0;
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ return true;
+ case X86::ADD32ri:
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return false;
+ break;
+ }
+ }
+
+ if (Ops.size() != 1)
+ return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ } else if (OpNum == 0) { // If operand 0
+ switch (Opc) {
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: return true;
+ default: break;
+ }
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (OpNum == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (OpNum == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+ return true;
+ return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(MI->getOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
+ if (UnfoldLoad && !FoldedLoad)
+ return false;
+ UnfoldLoad &= FoldedLoad;
+ if (UnfoldStore && !FoldedStore)
+ return false;
+ UnfoldStore &= FoldedStore;
+
+ const MCInstrDesc &MCID = get(Opc);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ if (!MI->hasOneMemOperand() &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+ // conservatively assume the address is unaligned. That's bad for
+ // performance.
+ return false;
+ SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
+ SmallVector<MachineOperand,2> BeforeOps;
+ SmallVector<MachineOperand,2> AfterOps;
+ SmallVector<MachineOperand,4> ImpOps;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (i >= Index && i < Index + X86::AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (Op.isReg() && Op.isImplicit())
+ ImpOps.push_back(Op);
+ else if (i < Index)
+ BeforeOps.push_back(Op);
+ else if (i > Index)
+ AfterOps.push_back(Op);
+ }
+
+ // Emit the load instruction.
+ if (UnfoldLoad) {
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+ if (UnfoldStore) {
+ // Address operands cannot be marked isKill.
+ for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
+ MachineOperand &MO = NewMIs[0]->getOperand(i);
+ if (MO.isReg())
+ MO.setIsKill(false);
+ }
+ }
+ }
+
+ // Emit the data processing instruction.
+ MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(DataMI);
+
+ if (FoldedStore)
+ MIB.addReg(Reg, RegState::Define);
+ for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+ MIB.addOperand(BeforeOps[i]);
+ if (FoldedLoad)
+ MIB.addReg(Reg);
+ for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+ MIB.addOperand(AfterOps[i]);
+ for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+ MachineOperand &MO = ImpOps[i];
+ MIB.addReg(MO.getReg(),
+ getDefRegState(MO.isDef()) |
+ RegState::Implicit |
+ getKillRegState(MO.isKill()) |
+ getDeadRegState(MO.isDead()) |
+ getUndefRegState(MO.isUndef()));
+ }
+ // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+ unsigned NewOpc = 0;
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri: {
+ MachineOperand &MO0 = DataMI->getOperand(0);
+ MachineOperand &MO1 = DataMI->getOperand(1);
+ if (MO1.getImm() == 0) {
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri8:
+ case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
+ case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
+ case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
+ case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ }
+ DataMI->setDesc(get(NewOpc));
+ MO1.ChangeToRegister(MO0.getReg(), false);
+ }
+ }
+ }
+ NewMIs.push_back(DataMI);
+
+ // Emit the store instruction.
+ if (UnfoldStore) {
+ const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+ }
+
+ return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ if (!N->isMachineOpcode())
+ return false;
+
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(N->getMachineOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
+ const MCInstrDesc &MCID = get(Opc);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ unsigned NumDefs = MCID.NumDefs;
+ std::vector<SDValue> AddrOps;
+ std::vector<SDValue> BeforeOps;
+ std::vector<SDValue> AfterOps;
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumOps = N->getNumOperands();
+ for (unsigned i = 0; i != NumOps-1; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (i < Index-NumDefs)
+ BeforeOps.push_back(Op);
+ else if (i > Index-NumDefs)
+ AfterOps.push_back(Op);
+ }
+ SDValue Chain = N->getOperand(NumOps-1);
+ AddrOps.push_back(Chain);
+
+ // Emit the load instruction.
+ SDNode *Load = 0;
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (FoldedLoad) {
+ EVT VT = *RC->vt_begin();
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned load.
+ return false;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
+ Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Load);
+
+ // Preserve memory reference information.
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ }
+
+ // Emit the data processing instruction.
+ std::vector<EVT> VTs;
+ const TargetRegisterClass *DstRC = 0;
+ if (MCID.getNumDefs() > 0) {
+ DstRC = getRegClass(MCID, 0, &RI);
+ VTs.push_back(*DstRC->vt_begin());
+ }
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
+ VTs.push_back(VT);
+ }
+ if (Load)
+ BeforeOps.push_back(SDValue(Load, 0));
+ std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+ BeforeOps.size());
+ NewNodes.push_back(NewNode);
+
+ // Emit the store instruction.
+ if (FoldedStore) {
+ AddrOps.pop_back();
+ AddrOps.push_back(SDValue(NewNode, 0));
+ AddrOps.push_back(Chain);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned store.
+ return false;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
+ SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+ isAligned, TM),
+ dl, MVT::Other,
+ &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Store);
+
+ // Preserve memory reference information.
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ }
+
+ return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(Opc);
+ if (I == MemOp2RegOpTable.end())
+ return 0;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
+ if (UnfoldLoad && !FoldedLoad)
+ return 0;
+ if (UnfoldStore && !FoldedStore)
+ return 0;
+ if (LoadRegIndex)
+ *LoadRegIndex = I->second.second & TB_INDEX_MASK;
+ return I->second.first;
+}
+
+bool
+X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const {
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+ unsigned Opc1 = Load1->getMachineOpcode();
+ unsigned Opc2 = Load2->getMachineOpcode();
+ switch (Opc1) {
+ default: return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ break;
+ }
+ switch (Opc2) {
+ default: return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ break;
+ }
+
+ // Check if chain operands and base addresses match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(5) != Load2->getOperand(5))
+ return false;
+ // Segment operands should match as well.
+ if (Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+ // Scale should be 1, Index should be Reg0.
+ if (Load1->getOperand(1) == Load2->getOperand(1) &&
+ Load1->getOperand(2) == Load2->getOperand(2)) {
+ if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
+ return false;
+
+ // Now let's examine the displacements.
+ if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
+ isa<ConstantSDNode>(Load2->getOperand(3))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
+ return true;
+ }
+ }
+ return false;
+}
+
+bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1);
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ unsigned Opc1 = Load1->getMachineOpcode();
+ unsigned Opc2 = Load2->getMachineOpcode();
+ if (Opc1 != Opc2)
+ return false; // FIXME: overly conservative?
+
+ switch (Opc1) {
+ default: break;
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ return false;
+ }
+
+ EVT VT = Load1->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ // XMM registers. In 64-bit mode we can be a bit more aggressive since we
+ // have 16 of them to play with.
+ if (TM.getSubtargetImpl()->is64Bit()) {
+ if (NumLoads >= 3)
+ return false;
+ } else if (NumLoads) {
+ return false;
+ }
+ break;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ if (NumLoads)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+ X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+ if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+ return true;
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // FIXME: Return false for x87 stack register classes for now. We can't
+ // allow any loads of these registers before FpGet_ST0_80.
+ return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+ RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Create the register. The code to initialize it is inserted
+ // later, by the CGBR pass (below).
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ X86FI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
+ // AVX 256-bit support
+ { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
+ { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
+ { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
+ { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
+ { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain,
+ domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(X86::NOOP);
+}
+
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+ switch (opc) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ // AVX instructions with high latency
+ case X86::VDIVSDrm:
+ case X86::VDIVSDrm_Int:
+ case X86::VDIVSDrr:
+ case X86::VDIVSDrr_Int:
+ case X86::VDIVSSrm:
+ case X86::VDIVSSrm_Int:
+ case X86::VDIVSSrr:
+ case X86::VDIVSSrr_Int:
+ case X86::VSQRTPDm:
+ case X86::VSQRTPDm_Int:
+ case X86::VSQRTPDr:
+ case X86::VSQRTPDr_Int:
+ case X86::VSQRTPSm:
+ case X86::VSQRTPSm_Int:
+ case X86::VSQRTPSr:
+ case X86::VSQRTPSr_Int:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSSr:
+ return true;
+ }
+}
+
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return isHighLatencyDef(DefMI->getOpcode());
+}
+
+namespace {
+ /// CGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for x86-32.
+ struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+ assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ // Only emit a global base reg in PIC mode.
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ unsigned PC;
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ else
+ PC = GlobalBaseReg;
+
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
+ }
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 0000000..97009db
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,375 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define GET_INSTRINFO_HEADER
+#include "X86GenInstrInfo.inc"
+
+namespace llvm {
+ class X86RegisterInfo;
+ class X86TargetMachine;
+
+namespace X86 {
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches to
+ // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs.
+ COND_NE_OR_P,
+ COND_NP_OR_E,
+
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(X86::CondCode CC);
+} // end namespace X86;
+
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_DLLIMPORT: // dllimport stub.
+ case X86II::MO_GOTPCREL: // rip-relative GOT reference.
+ case X86II::MO_GOT: // normal GOT reference.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg). If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_GOTOFF: // isPICStyleGOT: local global.
+ case X86II::MO_GOT: // isPICStyleGOT: other global.
+ case X86II::MO_PIC_BASE_OFFSET: // Darwin local global.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+ case X86II::MO_TLVP: // ??? Pretty sure..
+ return true;
+ default:
+ return false;
+ }
+}
+
+inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImm() &&
+ (MO.getImm() == 1 || MO.getImm() == 2 ||
+ MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isReg() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isReg() &&
+ (MI->getOperand(Op+3).isImm() ||
+ MI->getOperand(Op+3).isGlobal() ||
+ MI->getOperand(Op+3).isCPI() ||
+ MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+5 <= MI->getNumOperands() &&
+ MI->getOperand(Op+4).isReg() &&
+ isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public X86GenInstrInfo {
+ X86TargetMachine &TM;
+ const X86RegisterInfo RI;
+
+ /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+ /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+ ///
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > RegOp2MemOpTableType;
+ RegOp2MemOpTableType RegOp2MemOpTable2Addr;
+ RegOp2MemOpTableType RegOp2MemOpTable0;
+ RegOp2MemOpTableType RegOp2MemOpTable1;
+ RegOp2MemOpTableType RegOp2MemOpTable2;
+
+ /// MemOp2RegOpTable - Load / store unfolding opcode map.
+ ///
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
+ MemOp2RegOpTableType MemOp2RegOpTable;
+
+ void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
+
+public:
+ explicit X86InstrInfo(X86TargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+ /// true, then it's expected the pre-extension value is available as a subreg
+ /// of the result register. This also returns the sub-register index in
+ /// SubIdx.
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ /// convertToThreeAddress - This method must be implemented by targets that
+ /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+ /// may be able to convert a two-address instruction into a true
+ /// three-address instruction on demand. This allows the X86 target (for
+ /// example) to convert ADD and SHL instructions into LEA instructions if they
+ /// would require register copies due to two-addressness.
+ ///
+ /// This method returns a null pointer if the transformation cannot be
+ /// performed, otherwise it returns the new instruction.
+ ///
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// commuteInstruction - We have a few instructions that must be hacked on to
+ /// commute them.
+ ///
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ // Branch analysis.
+ virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand(s). If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ /// foldMemoryOperand - Same as the previous version except it allows folding
+ /// of any load and store from / to any address, not just from a specific
+ /// stack slot.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const;
+
+ /// canFoldMemoryOperand - Returns true if the specified load / store is
+ /// folding is possible.
+ virtual bool canFoldMemoryOperand(const MachineInstr*,
+ const SmallVectorImpl<unsigned> &) const;
+
+ /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+ /// a store or a load and a store into two or more instruction. If this is
+ /// possible, returns true as well as the new instructions by reference.
+ virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const;
+
+ /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+ /// index of the operand which will hold the register holding the loaded
+ /// value.
+ virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
+ /// to determine if two loads are loading from the same base address. It
+ /// should only return true if the base pointers are the same and the
+ /// only differences between the two addresses are the offset. It also returns
+ /// the offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+ /// instruction that defines the specified register class.
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ static bool isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isReg()) return false;
+ return X86II::isX86_64ExtendedReg(MO.getReg());
+ }
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
+
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Alignment) const;
+
+ bool isHighLatencyDef(int opc) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+
+private:
+ MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// isFrameOperand - Return true and the FrameIndex if the specified
+ /// operand and follow operands form a reference to the stack frame.
+ bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 0000000..d54bf27
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,1799 @@
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+
+def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+ [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86SetCC_C : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+ SDTCisVT<2, i8>]>;
+def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+ SDTCisVT<1, iPTR>,
+ SDTCisVT<2, iPTR>]>;
+
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86Void : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+ [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
+def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
+def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
+def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86vastart_save_xmm_regs :
+ SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+ SDT_X86VASTART_SAVE_XMM_REGS,
+ [SDNPHasChain, SDNPVariadic]>;
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad]>;
+
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
+
+def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
+def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
+ [SDNPHasChain]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClasses = [];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86MemAsmOperand];
+}
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+let OperandType = "OPERAND_MEMORY" in {
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+def opaque512mem : X86MemOperand<"printopaquemem">;
+
+def i8mem : X86MemOperand<"printi8mem">;
+def i16mem : X86MemOperand<"printi16mem">;
+def i32mem : X86MemOperand<"printi32mem">;
+def i64mem : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
+def f32mem : X86MemOperand<"printf32mem">;
+def f64mem : X86MemOperand<"printf64mem">;
+def f80mem : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
+}
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+ let PrintMethod = "printi8mem";
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+let OperandType = "OPERAND_PCREL",
+ ParserMatchClass = X86AbsMemAsmOperand,
+ PrintMethod = "print_pcrel_imm" in {
+def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
+}
+
+def SSECC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+class ImmZExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] |
+// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x000000FF]
+def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
+ let Name = "ImmZExtu32u8";
+}
+
+
+// [0, 0x0000007F] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+ ImmSExti64i32AsmOperand];
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16> {
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32> {
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
+def u32u8imm : Operand<i32> {
+ let ParserMatchClass = ImmZExtu32u8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "print_pcrel_imm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex],
+ []>;
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasCMov : Predicate<"Subtarget->hasCMov()">;
+def NoCMov : Predicate<"!Subtarget->hasCMov()">;
+
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+
+def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
+def HasAES : Predicate<"Subtarget->hasAES()">;
+def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
+def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
+def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
+def HasF16C : Predicate<"Subtarget->hasF16C()">;
+def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
+def HasBMI : Predicate<"Subtarget->hasBMI()">;
+def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
+ AssemblerPredicate<"!Mode64Bit">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">,
+ AssemblerPredicate<"Mode64Bit">;
+def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"ModeNaCl">;
+def IsNaCl32 : Predicate<"Subtarget->isTargetNaCl32()">,
+ AssemblerPredicate<"ModeNaCl,!Mode64Bit">;
+def IsNaCl64 : Predicate<"Subtarget->isTargetNaCl64()">,
+ AssemblerPredicate<"ModeNaCl,Mode64Bit">;
+def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"!ModeNaCl">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+ "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+ "TM.getCodeModel() == CodeModel::Kernel">;
+def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
+def OptForSize : Predicate<"OptForSize">;
+def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments.
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
+def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
+def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
+def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
+def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
+def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
+def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS : PatLeaf<(i8 12)>;
+def X86_COND_O : PatLeaf<(i8 13)>;
+def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S : PatLeaf<(i8 15)>;
+
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+ def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+ def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
+
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+ return N->hasOneUse();
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list.
+//
+
+// Nop
+let neverHasSideEffects = 1 in {
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+ def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
+ "nop{w}\t$zero", []>, TB, OpSize;
+ def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+ "nop{l}\t$zero", []>, TB;
+}
+
+
+// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>;
+
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
+
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize;
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
+ OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let mayStore = 1 in {
+def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize;
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+ OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+
+def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+ "push{l}\t$imm", []>;
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{w}\t$imm", []>, OpSize;
+def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+ "push{l}\t$imm", []>;
+
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+ Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
+
+
+
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad=1, neverHasSideEffects=1 in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+ Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
+def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
+ OpSize;
+def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
+def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
+
+def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
+def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+ OpSize;
+def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
+def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
+} // Defs = [EFLAGS]
+
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+}
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
+
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in {
+def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+
+def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
+
+/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
+/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{b}\t{$src, %al|AL, $src}", []>,
+ Requires<[In32BitMode]>;
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
+ "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
+ "mov{l}\t{$src, %eax|EAX, $src}", []>,
+ Requires<[In32BitMode]>;
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{b}\t{%al, $dst|$dst, AL}", []>,
+ Requires<[In32BitMode]>;
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+ "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+ "mov{l}\t{%eax, $dst|$dst, EAX}", []>,
+ Requires<[In32BitMode]>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
+*/
+
+
+let isCodeGenOnly = 1 in {
+def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+}
+
+def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)]>;
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+ (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+ (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayLoad = 1, neverHasSideEffects = 1,
+ canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+}
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
+
+
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
+
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Make these instructions disassembly
+// only for now.
+
+def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi16 addr:$src1), GR16:$src2),
+// (implicit EFLAGS)]
+ []
+ >, OpSize, TB, Requires<[FastBTMem]>;
+def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi32 addr:$src1), GR32:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+ ]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+ ]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ i64immSExt8:$src2))]>, TB;
+
+
+def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // Defs = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+let Constraints = "$val = $dst" in {
+def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+ "xchg{q}\t{$val, $ptr|$ptr, $val}",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+
+def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
+ "xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+ "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+ "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+ "xchg{q}\t{$val, $src|$src, $val}", []>;
+}
+
+def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
+ "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize;
+def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
+ "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>;
+// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding.
+// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP.
+def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
+ "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|RAX, $src}", []>;
+
+
+
+def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+}
+
+def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
+def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
+ "cmpxchg8b\t$dst", []>, TB;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>;
+
+
+
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
+}
+
+
+// String manipulation instructions
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+
+def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
+def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
+def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
+
+
+// Flag instructions
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
+
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+
+// Table lookup instructions
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
+
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>,
+ Requires<[In32BitMode]>;
+
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// MOVBE Instructions
+//
+let Predicates = [HasMOVBE] in {
+ def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8;
+ def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8;
+ def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8;
+ def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8;
+ def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR32:$src), addr:$dst)]>, T8;
+ def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR64:$src), addr:$dst)]>, T8;
+}
+
+//===----------------------------------------------------------------------===//
+// RDRAND Instruction
+//
+let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
+ def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
+ "rdrand{w}\t$dst", []>, OpSize, TB;
+ def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
+ "rdrand{l}\t$dst", []>, TB;
+ def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
+ "rdrand{q}\t$dst", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// LZCNT Instruction
+//
+let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
+ def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// TZCNT Instruction
+//
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// Subsystems.
+//===----------------------------------------------------------------------===//
+
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
+
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+include "X86InstrFMA.td"
+
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwd", "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl">;
+
+// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
+// all modes. However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode. Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz", "fcmove">;
+def : MnemonicAlias<"fcmova", "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna", "fcmovbe">;
+def : MnemonicAlias<"fcmovae", "fcmovnb">;
+def : MnemonicAlias<"fcomip", "fcompi">;
+def : MnemonicAlias<"fildq", "fildll">;
+def : MnemonicAlias<"fldcww", "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip", "fucompi">;
+def : MnemonicAlias<"fwait", "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+ : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+ !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+ def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb
+ def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete
+ def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe
+ def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae
+ def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae
+ def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle
+ def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge
+ def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne
+ def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp
+ def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp
+
+ def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb
+ def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta
+ def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl
+ def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler Instruction Aliases
+//===----------------------------------------------------------------------===//
+
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
+def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
+def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
+def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
+def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcomi", (COM_FIr ST1)>;
+def : InstAlias<"fcompi", (COM_FIPr ST1)>;
+def : InstAlias<"fucom", (UCOM_Fr ST1)>;
+def : InstAlias<"fucomp", (UCOM_FPr ST1)>;
+def : InstAlias<"fucomi", (UCOM_FIr ST1)>;
+def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+ (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+ (Inst ST0), EmitAlias>;
+}
+
+defm : FpUnaryAlias<"fadd", ADD_FST0r>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw" , (FNSTSW8r)>;
+
+// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move. All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/* FIXME: This is disabled because the asm matcher is currently incapable of
+ * matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
+def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>;
+def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 0000000..b2d9fca
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,454 @@
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+ multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ }
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+ (ins VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+ }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ let isCommutable = 0 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+ def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+ [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ []>;
+
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+ (outs GR64:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (bitconvert VR64:$src))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (bitconvert GR64:$src))]>;
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (x86mmx VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+ "movdq2q\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movntq\t{$src, $dst|$dst, $src}",
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+ (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (X86vzmovl (x86mmx
+ (scalar_to_vector (loadi32 addr:$src))))))]>;
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
+// -- Multiplication
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw>;
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+ defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+ int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+ int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+ int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+ int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+ int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+ int_x86_mmx_punpckldq>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+ (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))]>;
+
+
+
+
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+ (iPTR imm:$src2)))]>;
+let Constraints = "$src1 = $dst" in {
+ def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
+ (outs VR64:$dst),
+ (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ GR32:$src2, (iPTR imm:$src3)))]>;
+
+ def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
+ (outs VR64:$dst),
+ (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3)))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
+ (v2i64 (MOVQI2PQIrm addr:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+ (v2i64 (MOVDI2PDIrm addr:$src))>;
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+ (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+ (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+
+// 64-bit bit convert.
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+ (MMX_MOVFR642Qrr FR64:$src)>;
+
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 0000000..d3ced23
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,6799 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 Instructions Classes
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, X86MemOperand x86memop,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+ }
+ def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ Operand memopr, ComplexPattern mem_cpat,
+ bit Is2Addr = 1> {
+ def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, RC:$src2))]>;
+ def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+ SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ let mayLoad = 1 in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+ string OpcodeStr, X86MemOperand x86memop,
+ list<dag> pat_rr, list<dag> pat_rm,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rr, d>;
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, RC:$src2))], d>;
+ def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-instruction patterns
+//===----------------------------------------------------------------------===//
+
+// A vector extract of the first f32/f64 position is a subregister copy
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// A 128-bit subvector extract from the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
+ (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
+def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
+ (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
+
+def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
+ (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
+def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
+ (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
+
+def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
+ (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
+def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
+ (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
+
+// A 128-bit subvector insert to the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+
+// Bitcasts between 128-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasXMMInt] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Bitcasts between 256-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
+}
+
+// Alias instructions that map fld0 to pxor for sse.
+// FIXME: Set encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+ def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+ def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+ def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX & SSE - Zero/One Vectors
+//===----------------------------------------------------------------------===//
+
+// Alias instruction that maps zero vector to pxor / xorp* for sse.
+// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
+// swizzled by ExecutionDepsFix to pxor.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, neverHasSideEffects = 1 in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
+}
+
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+
+
+// The same as done above but for AVX. The 256-bit ISA does not support PI,
+// and doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+}
+
+
+// AVX has no support for 256-bit integer instructions, but since the 128-bit
+// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+ def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move FP Scalar Instructions
+//
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+ SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+ [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> :
+ SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// AVX
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
+ VEX_LIG;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
+ VEX_LIG;
+
+// For the disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XS, VEX_4V, VEX_LIG;
+ def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XD, VEX_4V, VEX_LIG;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
+ VEX_LIG;
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
+ VEX_LIG;
+}
+
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;
+
+// SSE1 & 2
+let Constraints = "$src1 = $dst" in {
+ def MOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $dst|$dst, $src2}">, XS;
+ def MOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+
+ // For the disassembler
+ let isCodeGenOnly = 1 in {
+ def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}", []>, XS;
+ def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}", []>, XD;
+ }
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+ let AddedComplexity = 20 in
+ def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+// Patterns
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 32-bit value from one vector and insert it into another.
+ def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSS to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+ // Shuffle with MOVSS
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 64-bit value from one vector and insert it into another.
+ def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+ def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSD to the lower bits.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with MOVSD
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+}
+
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 32-bit value from one vector and insert it into another.
+ def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+
+ // Extract the low 64-bit value from one vector and insert it into another.
+ def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+ def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+ // Represent the same patterns above but in the form they appear for
+ // 256-bit types
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+ }
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
+ (SUBREG_TO_REG (i32 0),
+ (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
+ (SUBREG_TO_REG (i64 0),
+ (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
+ sub_xmm)>;
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with VMOVSS
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (VMOVSSrr VR128:$src1, FR32:$src2)>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+
+ // Shuffle with VMOVSD
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (VMOVSDrr VR128:$src1, FR64:$src2)>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
+ sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
+ sub_sd))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d,
+ bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>;
+}
+
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, TB, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, TB, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, TB, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+ "movaps", SSEPackedSingle>, TB, VEX;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+ "movapd", SSEPackedDouble>, TB, OpSize, VEX;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+ "movups", SSEPackedSingle>, TB, VEX;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize;
+
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+ def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+ def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>;
+ def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (VMOVUPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE1] in
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE2] in
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (MOVUPDmr addr:$dst, VR128:$src)>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX integer load/store.
+let Predicates = [HasAVX] in {
+ // 128-bit load/store
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store
+ def : Pat<(alignedloadv4i64 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv4i64 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+let isCodeGenOnly = 1 in {
+ def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX;
+ def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+ PatFrag mov_frag, string base_opc,
+ string asm_opr> {
+ def PSrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(base_opc, "s", asm_opr),
+ [(set RC:$dst,
+ (mov_frag RC:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+ SSEPackedSingle>, TB;
+
+ def PDrm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ !strconcat(base_opc, "d", asm_opr),
+ [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))],
+ SSEPackedDouble>, TB, OpSize;
+}
+
+let AddedComplexity = 20 in {
+ defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+ def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
+ def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
+ VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
+ def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with VMOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Shuffle with VMOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+ def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
+ VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
+ def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
+ def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Hi packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 20 in {
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>,
+ VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>,
+ VEX;
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
+
+let Predicates = [HasAVX] in {
+ // VMOVHPS patterns
+ def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: This should be matched by a X86Movhpd instead. Same as above
+ def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (VMOVHPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (VMOVHPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE1] in {
+ // MOVHPS patterns
+ def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: This should be matched by a X86Movhpd instead. Same as above
+ def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 20 in {
+ def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+ def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
+
+let Predicates = [HasAVX] in {
+ // MOVLHPS patterns
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+
+ // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+ def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ }
+ def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+ def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+ // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+ def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
+ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
+ }
+
+ def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE1] in {
+ // MOVLHPS patterns
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+
+ // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+ def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ }
+ def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+ def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+ // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+ def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+ }
+
+ def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+}
+
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_LIG;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W, VEX_LIG;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
+ VEX_LIG;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX, VEX_W, VEX_LIG;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+ VEX_4V, VEX_LIG;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+ VEX_4V, VEX_W, VEX_LIG;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+ VEX_4V, VEX_LIG;
+defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+ VEX_4V, VEX_LIG;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+ VEX_4V, VEX_W, VEX_LIG;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f32 (sint_to_fp GR32:$src)),
+ (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f32 (sint_to_fp GR64:$src)),
+ (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+ def : Pat<(f64 (sint_to_fp GR32:$src)),
+ (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f64 (sint_to_fp GR64:$src)),
+ (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, bit Is2Addr = 1> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG;
+defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W,
+ VEX_LIG;
+
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si{q}">, XD, REX_W;
+
+
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
+
+let Constraints = "$src1 = $dst" in {
+ defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32,
+ "cvtsi2ss">, XS;
+ defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64,
+ "cvtsi2ss{q}">, XS, REX_W;
+ defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+ "cvtsi2sd">, XD;
+ defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+ "cvtsi2sd">, XD, REX_W;
+}
+
+/// SSE 1 Only
+
+// Aliases for intrinsics
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttsd2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttsd2si">, XD, VEX, VEX_W;
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si{q}">, XS, REX_W;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttsd2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttsd2si{q}">, XD, REX_W;
+
+let Pattern = []<dag> in {
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
+ VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+ "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W, VEX_LIG;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+}
+
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
+}
+
+let Predicates = [HasSSE1] in {
+ def : Pat<(int_x86_sse_cvtss2si VR128:$src),
+ (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
+ (CVTSS2SIrm addr:$src)>;
+ def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
+ (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
+ (CVTSS2SI64rm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_cvtss2si VR128:$src),
+ (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
+ (VCVTSS2SIrm addr:$src)>;
+ def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
+ (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
+ (VCVTSS2SI64rm addr:$src)>;
+}
+
+/// SSE 2 Only
+
+// Convert scalar double to scalar single
+def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V, VEX_LIG;
+let mayLoad = 1 in
+def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+ Requires<[HasAVX]>;
+
+def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
+
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+ XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
+
+// Convert scalar single to scalar double
+// SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+let mayLoad = 1 in
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(f64 (fextend FR32:$src)),
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+}
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
+
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+ Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+ Requires<[HasSSE2, OptForSize]>;
+
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
+}
+
+// Convert doubleword to packed single/double fp
+// SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, Requires<[HasSSE2]>;
+
+// FIXME: why the non-intrinsic version is described as SSE3?
+// SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+
+// Convert packed single/double fp to doubleword
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>, VEX;
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>;
+
+// SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, VEX, Requires<[HasAVX]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, VEX, Requires<[HasAVX]>;
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+// SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+let mayLoad = 1 in
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+let mayLoad = 1 in
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>;
+def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
+
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (VCVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
+ (VCVTTPS2DQYrr VR256:$src)>;
+}
+
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX;
+let isCodeGenOnly = 1 in
+def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
+
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+
+// Convert packed single to packed double
+let Predicates = [HasAVX] in {
+ // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+}
+def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ TB, Requires<[HasSSE2]>;
+
+// Convert packed double to packed single
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+
+
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+ (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTTPS2DQYrm addr:$src)>;
+
+// Match fround and fextend for 128/256-bit conversions
+def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ (VCVTPS2PDYrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ SDNode OpNode, ValueType VT, PatFrag ld_frag,
+ string asm, string asm_alt> {
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1),
+ (ld_frag addr:$src2), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>;
+ }
+}
+
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+ "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XS, VEX_4V, VEX_LIG;
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+ "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XD, VEX_4V, VEX_LIG;
+
+let Constraints = "$src1 = $dst" in {
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm> {
+ def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+ defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, X86MemOperand x86memop,
+ PatFrag ld_frag, string OpcodeStr, Domain d> {
+ def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+ def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB, VEX,
+ VEX_LIG;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
+ }
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, TB, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ let Pattern = []<dag> in {
+ defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+ }
+
+ defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB;
+ defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm, string asm_alt,
+ Domain d> {
+ let isAsmParserOnly = 1 in {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>;
+ }
+
+ // Accept explicit immediate argument form instead of comparison code.
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc),
+ asm_alt, [], d>;
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
+ "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SSEPackedSingle>, TB;
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
+ "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize;
+}
+
+let Predicates = [HasSSE1] in {
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasSSE2] in {
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
+def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string asm, PatFrag mem_frag,
+ Domain d, bit IsConvertibleToThreeAddress = 0> {
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (shufp:$src3
+ RC:$src1, (mem_frag addr:$src2))))], d>;
+ let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst,
+ (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
+}
+
+defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+let Constraints = "$src1 = $dst" in {
+ defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+ TB;
+ defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+ // fall back to this for SSE1)
+ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPSrri case.
+ def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+}
+
+let Predicates = [HasSSE2] in {
+ // Special binary v4i32 shuffle cases with SHUFPS.
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPDrri cases.
+ def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v2i64 shuffle cases using SHUFPDrri.
+ def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Generic SHUFPD patterns
+ def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+ // fall back to this for SSE1)
+ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (VSHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPSrri case.
+ def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v4i32 shuffle cases with SHUFPS.
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPDrri cases.
+ def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v2i64 shuffle cases using SHUFPDrri.
+ def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (VSHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+
+ def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+ // 256-bit patterns
+ def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8i32 (X86Shufps VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8f32 (X86Shufps VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4i64 (X86Shufpd VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4f64 (X86Shufpd VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ def rm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+ let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+ let AddedComplexity = 10 in
+ def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+ let AddedComplexity = 10 in
+ def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+ Domain d> {
+ def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32:$dst, (Int RC:$src))], d>;
+ def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
+}
+
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
+def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
+
+let Predicates = [HasAVX] in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+
+ def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>;
+ def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>;
+ def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>;
+ def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ OpSize, VEX;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ OpSize, VEX;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V;
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+ f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+ f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+ }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let mayLoad = 0 in {
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+}
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ // In AVX no need to add a pattern for 128-bit logical rr ps, because they
+ // are all promoted to v2i64, and the patterns are covered by the int
+ // version. This is needed in SSE only, because v2i64 isn't supported on
+ // SSE1, but only on SSE2.
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem, [],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V;
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB;
+
+ defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB, OpSize;
+ }
+}
+
+/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
+///
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
+
+ defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V;
+}
+
+// AVX 256-bit packed logical ops forms
+defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
+
+defm AND : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ let mayLoad = 0 in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let mayLoad = 0 in {
+ defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+ v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+ defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+ v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
+ SSEPackedSingle, Is2Addr>, TB;
+
+ defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
+ SSEPackedDouble, Is2Addr>, TB, OpSize;
+}
+
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+ SSEPackedSingle, 0>, TB;
+
+ defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+ SSEPackedDouble, 0>, TB, OpSize;
+}
+
+// Binary Arithmetic instructions
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
+defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+
+let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
+ defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
+ defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
+ defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
+ defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd>,
+ basic_sse12_fp_binop_s_int<0x58, "add">;
+ defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_s_int<0x59, "mul">;
+
+ let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub">;
+ defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_s_int<0x5E, "div">;
+ defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_s_int<0x5F, "max">,
+ basic_sse12_fp_binop_p_int<0x5F, "max">;
+ defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_s_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_int<0x5D, "min">;
+ }
+}
+
+/// Unop Arithmetic
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ Requires<[HasSSE1, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ let mayLoad = 1 in
+ def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins ssmem:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+ def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode FR64:$src))]>;
+ // See the comments in sse1_fp_unop_s for why this is OptForSize.
+ def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+}
+
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+ def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+ def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src))]>;
+ def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
+let Predicates = [HasAVX] in {
+ // Square root.
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
+ sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
+
+ defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
+ VEX;
+
+ // Reciprocal approximations. Note that these typically require refinement
+ // in order to obtain suitable precision.
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
+ defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+ sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
+
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
+ defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+ sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
+}
+
+def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
+ (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
+ sub_sd)>;
+ def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
+ (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
+ (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
+ (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+}
+
+// Square root.
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+}
+
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+ (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+ (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti{l}\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti{q}\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Prefetch and memory fence
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
+ "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+def : Pat<(X86SFence), (SFENCE)>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let neverHasSideEffects = 1 in {
+def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// For Disassembler
+let isCodeGenOnly = 1 in {
+def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
+}
+
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+
+// For Disassembler
+let isCodeGenOnly = 1 in {
+def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+}
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, Requires<[HasSSE2]>;
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, Requires<[HasSSE2]>;
+}
+
+// Intrinsic forms of MOVDQU load and store
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+ def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+// 128-bit Integer Arithmetic
+
+let Predicates = [HasAVX] in {
+defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+ VEX_4V;
+defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+ VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+ VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+ VEX_4V;
+defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+ VEX_4V;
+defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+ VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+ VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+ VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+ VEX_4V;
+defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+ VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+ VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+ VEX_4V;
+defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+ VEX_4V;
+defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+ VEX_4V;
+defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+ VEX_4V;
+defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+ VEX_4V;
+defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+ VEX_4V;
+defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+ VEX_4V;
+defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+ VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+// Intrinsic forms
+defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+ VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+ VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+ VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+ VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+ VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+ VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+ VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+ VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def VPANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
+
+ def VPANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (X86andnp VR128:$src1,
+ (memopv2i64 addr:$src2)))]>, VEX_4V;
+}
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
+
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
+}
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+ defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+ 0>, VEX_4V;
+ defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+ 0>, VEX_4V;
+
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (VPCMPEQWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (VPCMPEQDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (VPCMPGTBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (VPCMPGTWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (VPCMPGTDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTDrm VR128:$src1, addr:$src2)>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+ defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+ defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+ defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+ defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+ 0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+ 0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+ 0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+ PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+ (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2
+ (bc_frag (memopv2i64 addr:$src1)),
+ (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
+
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 5 in
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize,
+ VEX;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+ VEX;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+ VEX;
+
+ let AddedComplexity = 5 in
+ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+ // Unary v4f32 shuffle with VPSHUF* in order to fold a load.
+ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (VPSHUFHWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (VPSHUFHWmi addr:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (VPSHUFLWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (VPSHUFLWmi addr:$src, imm:$imm)>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 5 in
+ defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+
+ let AddedComplexity = 5 in
+ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+ // Unary v4f32 shuffle with PSHUF* in order to fold a load.
+ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+ SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_frag (memopv2i64
+ addr:$src2))))]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ bc_v4i32, 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
+ VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ bc_v4i32, 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
+ VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (X86Punpcklqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (X86Punpckhqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+}
+} // ExeDomain = SSEPackedInt
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+ def rri : Ii8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def rmi : Ii8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Extract
+let Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, TB, OpSize, VEX;
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>;
+
+// Insert
+let Predicates = [HasAVX] in {
+ defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
+ def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, TB, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Packed Double Int
+//
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+ VEX;
+def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
+def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
+
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
+
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Single Scalar
+//
+def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ VEX;
+def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+//===---------------------------------------------------------------------===//
+// Move Packed Doubleword Int to Packed Double Int
+//
+def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// Move Packed Doubleword Int first element to Doubleword Int
+//
+def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>,
+ TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+
+//===---------------------------------------------------------------------===//
+// Bitcast FR64 <-> GR64
+//
+let Predicates = [HasAVX] in
+def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX;
+def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// Move Scalar Single to Double Int
+//
+def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// Patterns and instructions to describe movd/movq to XMM register zero-extends
+//
+let AddedComplexity = 15 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>,
+ VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>,
+ VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>,
+ VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>;
+}
+
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ }
+ // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
+}
+
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Move Quadword Int to Packed Quadword Int
+//
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+//===---------------------------------------------------------------------===//
+// Move Packed Quadword Int to Quadword Int
+//
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// Store / copy lower 64-bits of a XMM register.
+//
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 20 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+let AddedComplexity = 20 in
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, Requires<[HasSSE2]>;
+
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+let Predicates = [HasAVX], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)),
+ (VMOVZQI2PQIrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+//
+let AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+let AddedComplexity = 20 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+}
+
+let AddedComplexity = 20 in {
+ let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>;
+ }
+ let Predicates = [HasAVX] in {
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (VMOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (VMOVZPQILo2PQIrr VR128:$src)>;
+ }
+}
+
+// Instructions to match in the assembler
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+
+// Instructions for the disassembler
+// xr = XMM register
+// xm = mem64
+
+let Predicates = [HasAVX] in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
+def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, XS;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
+def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTPD2DQYrm addr:$src)>;
+
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
+def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
+ ValueType vt, RegisterClass RC, PatFrag mem_frag,
+ X86MemOperand x86memop> {
+def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (vt (OpNode RC:$src)))]>;
+def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
+ memopv4f32, f128mem>;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
+ memopv4f32, f128mem>;
+
+let Predicates = [HasSSE3] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSLDUPrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (VMOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (VMOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSLDUPrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movshdup VR256:$src)),
+ (VMOVSHDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSHDUPYrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movsldup VR256:$src)),
+ (VMOVSLDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSLDUPYrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Replicate Double FP - MOVDDUP
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
+ (undef))))]>;
+}
+
+// FIXME: Merge with above classe when there're patterns for the ymm version
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+let Predicates = [HasAVX] in {
+ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+ }
+}
+
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+
+let Predicates = [HasSSE3] in {
+ def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (MOVDDUPrm addr:$src)>;
+ let AddedComplexity = 5 in {
+ def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>;
+ }
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ let AddedComplexity = 5 in {
+ def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ }
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+ // 256-bit version
+ def : Pat<(X86Movddup (memopv4f64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (memopv4i64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4f64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+ def : Pat<(X86Movddup (v4i64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Unaligned Integer
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+ def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ def rr : I<0xD0, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
+ def rm : I<0xD0, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
+}
+
+let Predicates = [HasAVX],
+ ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, TB, OpSize, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, TB, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+ ExeDomain = SSEPackedDouble in {
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+ f128mem>, TB, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+ f128mem>, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
+
+// Horizontal ops
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
+ def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
+
+ def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
+ def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
+
+ def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Absolute Instructions
+//===---------------------------------------------------------------------===//
+
+
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128> {
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>, VEX;
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>, VEX;
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>, VEX;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
+
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let ImmT = NoImm, Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+ int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+ int_x86_ssse3_psign_b_128, 0>, VEX_4V;
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+ int_x86_ssse3_psign_w_128, 0>, VEX_4V;
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+ int_x86_ssse3_psign_d_128, 0>, VEX_4V;
+}
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
+}
+
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ int_x86_ssse3_pshuf_b_128>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ int_x86_ssse3_psign_b_128>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ int_x86_ssse3_psign_w_128>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128>;
+}
+
+let Predicates = [HasSSSE3] in {
+ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>;
+ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>;
+
+ def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (VPSHUFBrr128 VR128:$src, VR128:$mask)>;
+ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (VPSHUFBrm128 VR128:$src, addr:$mask)>;
+
+ def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+ def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+ def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
+ defm PALIGN : ssse3_palign<"palignr">;
+
+let Predicates = [HasSSSE3] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Thread synchronization
+//===---------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+ Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+ Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+ Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+ Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Move with Sign/Zero Extend
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+ VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+ VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+ VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+ VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+ VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+ VEX;
+}
+
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+}
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+ VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+ VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+ VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+ VEX;
+}
+
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXWQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXWQrm addr:$src)>;
+}
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+ VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+ VEX;
+}
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in {
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
+
+defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
+defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
+defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+}
+defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasSSE41]>;
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasAVX]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+ defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
+
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+ imm:$src3))]>, OpSize;
+}
+
+let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm : Ii8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ TA, OpSize,
+ Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+ RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm_AVX : Ii8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TA, OpSize, Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int, bit Is2Addr = 1> {
+ // Intrinsic operation, reg.
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let Predicates = [HasAVX] in {
+ // Intrinsic form
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX;
+ defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+
+ // Instructions for the assembler
+ defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+ VEX;
+ defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
+}
+
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
+ def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ OpSize, XS;
+ def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, OpSize, XS;
+
+ def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+ int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
+ 0>, VEX_4V;
+ defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
+ 0>, VEX_4V;
+ defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
+ 0>, VEX_4V;
+ defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
+ 0>, VEX_4V;
+ defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
+ 0>, VEX_4V;
+ defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
+ 0>, VEX_4V;
+ defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
+ 0>, VEX_4V;
+ defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
+ 0>, VEX_4V;
+ defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
+ 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (VPCMPEQQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQQrm VR128:$src1, addr:$src2)>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
+ defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
+ defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
+ defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
+ defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
+ defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
+ defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
+ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
+ defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ let isCommutable = 0 in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ }
+ defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv32i8, i256mem, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in {
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem>;
+ }
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let Predicates = [HasAVX] in {
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, Intrinsic IntId> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_ps_256>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
+ (v8i32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
+ (v8f32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
+ (v4i64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
+ (v4f64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+}
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+let Predicates = [HasSSE41] in {
+ def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+}
+
+let Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize, VEX;
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
+ 0>, VEX_4V;
+
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (VPCMPGTQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTQrm VR128:$src1, addr:$src2)>;
+}
+
+let Constraints = "$src1 = $dst" in
+ defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (PCMPGTQrm VR128:$src1, addr:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+multiclass pseudo_pcmpistrm<string asm> {
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+ imm:$src3))]>;
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
+ VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+}
+
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+ def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+ def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+multiclass pseudo_pcmpestrm<string asm> {
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+}
+
+let Predicates = [HasAVX],
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS] in {
+ multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let Predicates = [HasAVX] in {
+defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
+ VEX;
+}
+
+defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+// Packed Compare Explicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
+ multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX,
+ (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let Predicates = [HasAVX] in {
+defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
+ VEX;
+}
+
+defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+ def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i8mem:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_8 GR32:$src1,
+ (load addr:$src2)))]>;
+ def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR8:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+ def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i16mem:$src2),
+ "crc32{w} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_16 GR32:$src1,
+ (load addr:$src2)))]>,
+ OpSize;
+ def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR16:$src2),
+ "crc32{w} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
+ OpSize;
+ def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "crc32{l} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_32 GR32:$src1,
+ (load addr:$src2)))]>;
+ def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "crc32{l} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+ def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i8mem:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_8 GR64:$src1,
+ (load addr:$src2)))]>,
+ REX_W;
+ def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR8:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
+ REX_W;
+ def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "crc32{q} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_64 GR64:$src1,
+ (load addr:$src2)))]>,
+ REX_W;
+ def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "crc32{q} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
+ REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+// Perform One Round of an AES Encryption/Decryption Flow
+let Predicates = [HasAVX, HasAES] in {
+ defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
+ int_x86_aesni_aesenc, 0>, VEX_4V;
+ defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
+ int_x86_aesni_aesenclast, 0>, VEX_4V;
+ defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
+ int_x86_aesni_aesdec, 0>, VEX_4V;
+ defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
+ int_x86_aesni_aesdeclast, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+ defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+ defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+ defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
+}
+
+let Predicates = [HasAES] in {
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (AESENCrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (AESENCrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (AESENCLASTrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (AESDECrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (AESDECrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (AESDECLASTrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (VAESENCrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (VAESENCrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (VAESENCLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (VAESENCLASTrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (VAESDECrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (VAESDECrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (VAESDECLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (VAESDECLASTrm VR128:$src1, addr:$src2)>;
+}
+
+// Perform the AES InvMixColumn Transformation
+let Predicates = [HasAVX, HasAES] in {
+ def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize, VEX;
+ def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ OpSize, VEX;
+}
+def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "aesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize;
+def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "aesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ OpSize;
+
+// AES Round Key Generation Assist
+let Predicates = [HasAVX, HasAES] in {
+ def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize, VEX;
+ def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize, VEX;
+}
+def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize;
+def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Carry-less Multiplication instructions
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>;
+
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>;
+}
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+
+multiclass pclmul_alias<string asm, int immop> {
+ def : InstAlias<!strconcat("pclmul", asm,
+ "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+ def : InstAlias<!strconcat("pclmul", asm,
+ "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// VINSERTF128 - Insert packed floating-point values
+//
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTF128 - Extract packed floating-point values
+//
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+//===----------------------------------------------------------------------===//
+// VMASKMOV - Conditional SIMD Packed Loads and Stores
+//
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256,
+ PatFrag pf128, PatFrag pf256> {
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
+ def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V;
+ def mr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256,
+ memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256,
+ memopv2f64, memopv4f64>;
+
+//===----------------------------------------------------------------------===//
+// VPERMIL - Permute Single and Double Floating-Point Values
+//
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+ Intrinsic IntVar, Intrinsic IntImm> {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop_f:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+
+def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+ VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+ VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+ VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// VZERO - Zero YMM registers
+//
+let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
+ // Zero All YMM registers
+ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>;
+
+ // Zero Upper bits of YMM registers
+ def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Half precision conversion instructions
+//
+let Predicates = [HasAVX, HasF16C] in {
+ def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst),
+ (ins VR256:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 0000000..8278568
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+ "shl{b}\t$dst", []>;
+def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t$dst", []>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t$dst", []>;
+def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst"
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+ "rol{b}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+ "rol{w}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+ OpSize;
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+ "rol{l}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+ "rol{q}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 0000000..05a5b36
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,467 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes. These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+ def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
+
+
+def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+ Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|AL, DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|EAX, DX}", []>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+ "in{b}\t{$port, %al|AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{w}\t{$port, %ax|AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{l}\t{$port, %eax|EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|DX, AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|DX, EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+ "out{b}\t{%al, $port|$port, AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{w}\t{%ax, $port|$port, AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{l}\t{%eax, $port|$port, EAX}", []>;
+
+def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+ "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+ "str{q}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+ "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+ "ltr{w}\t{$src}", []>, TB;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+ "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+ "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+ "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+ "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+ "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+ "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+ "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+ "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>;
+
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+ "push{w}\t{%fs|FS}", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+ "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+ "push{w}\t{%gs|GS}", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+ "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+ "push{q}\t{%fs|FS}", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+ "push{q}\t{%gs|GS}", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+ "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+ "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+ "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+ "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{w}\t{%fs|FS}", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{q}\t{%fs|FS}", []>, TB;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{w}\t{%gs|GS}", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{q}\t{%gs|GS}", []>, TB;
+
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lds{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "les{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+ "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+ "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+ "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+ "sldt{l}\t$dst", []>, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+// extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+ "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+ "lldt{w}\t$src", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+ "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+ "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+ "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+ "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+ "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+ "lmsw{w}\t$src", []>, TB;
+
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// XSAVE instructions
+let Defs = [RDX, RAX], Uses = [RCX] in
+ def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+ def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+let Uses = [RDX, RAX] in {
+ def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsave\t$dst", []>, TB;
+ def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstor\t$dst", []>, TB;
+ def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveopt\t$dst", []>, TB;
+ def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+ def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+def : InstAlias<"xstorerng", (XSTORE)>;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+ def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+ def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+ def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+ def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+ def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+ def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+ def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+ def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base Instructions
+let Predicates = [In64BitMode] in {
+ def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
+ "rdfsbase{l}\t$dst", []>, TB, XS;
+ def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
+ "rdfsbase{q}\t$dst", []>, TB, XS;
+ def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
+ "rdgsbase{l}\t$dst", []>, TB, XS;
+ def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
+ "rdgsbase{q}\t$dst", []>, TB, XS;
+ def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst),
+ "wrfsbase{l}\t$dst", []>, TB, XS;
+ def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst),
+ "wrfsbase{q}\t$dst", []>, TB, XS;
+ def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst),
+ "wrgsbase{l}\t$dst", []>, TB, XS;
+ def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst),
+ "wrgsbase{q}\t$dst", []>, TB, XS;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 0000000..09a7a7d0c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,60 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+ "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+ "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 0000000..3f88fa6
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,574 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Valgrind.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xE9; // Emit JMP opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+
+ // X86 doesn't need to invalidate the processor cache, so just invalidate
+ // Valgrind's cache directly.
+ sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+// need to add some workarounds for compilers, which support
+// only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ // Save RBP
+ "pushq %rbp\n"
+ CFI(".cfi_def_cfa_offset 16\n")
+ CFI(".cfi_offset %rbp, -16\n")
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ CFI(".cfi_def_cfa_register %rbp\n")
+ // Save all int arg registers
+ "pushq %rdi\n"
+ CFI(".cfi_rel_offset %rdi, 0\n")
+ "pushq %rsi\n"
+ CFI(".cfi_rel_offset %rsi, 8\n")
+ "pushq %rdx\n"
+ CFI(".cfi_rel_offset %rdx, 16\n")
+ "pushq %rcx\n"
+ CFI(".cfi_rel_offset %rcx, 24\n")
+ "pushq %r8\n"
+ CFI(".cfi_rel_offset %r8, 32\n")
+ "pushq %r9\n"
+ CFI(".cfi_rel_offset %r9, 40\n")
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+#ifdef _WIN64
+ "subq $32, %rsp\n"
+ "movq %rbp, %rcx\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rdx\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addq $32, %rsp\n"
+#else
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+#endif
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ CFI(".cfi_def_cfa_register %rsp\n")
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ CFI(".cfi_adjust_cfa_offset 48\n")
+ "popq %r9\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r9\n")
+ "popq %r8\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r8\n")
+ "popq %rcx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rcx\n")
+ "popq %rdx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdx\n")
+ "popq %rsi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rsi\n")
+ "popq %rdi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdi\n")
+ // Restore RBP
+ "popq %rbp\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rbp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+# else
+ // No inline assembler support on this platform. The routine is in external
+ // file.
+ void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+# if defined(__APPLE__)
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+# endif
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register %esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+
+ // Same as X86CompilationCallback but also saves XMM argument registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ TYPE_FUNCTION(X86CompilationCallback_SSE)
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ CFI(".cfi_restore %xmm3\n")
+ "movaps 32(%esp), %xmm2\n"
+ CFI(".cfi_restore %xmm2\n")
+ "movaps 16(%esp), %xmm1\n"
+ CFI(".cfi_restore %xmm1\n")
+ "movaps (%esp), %xmm0\n"
+ CFI(".cfi_restore %xmm0\n")
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback_SSE)
+ );
+# else
+ void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+ _declspec(naked) void X86CompilationCallback(void) {
+ __asm {
+ push ebp
+ mov ebp, esp
+ push eax
+ push edx
+ push ecx
+ and esp, -16
+ sub esp, 16
+ mov eax, dword ptr [ebp+4]
+ mov dword ptr [esp+4], eax
+ mov dword ptr [esp], ebp
+ call X86CompilationCallback2
+ mov esp, ebp
+ sub esp, 12
+ pop ecx
+ pop edx
+ pop eax
+ pop ebp
+ ret
+ }
+ }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+ void X86CompilationCallback() {
+ llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
+ }
+#endif
+}
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" {
+#if !(defined (X86_64_JIT) && defined(_MSC_VER))
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is
+ // no support for inline assembly
+static
+#endif
+void LLVM_ATTRIBUTE_USED
+X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[1];
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // It's a stub if there is an interrupt marker after the call.
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
+
+ // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+ RetAddr--; // Backtrack to the reference itself...
+#else
+ RetAddr -= 4; // Backtrack to the reference itself...
+#endif
+
+#if 0
+ DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
+#endif
+
+ // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+ assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+#if defined (X86_64_JIT)
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
+#else
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+ if (isStub) {
+ // If this is a stub, rewrite the call into an unconditional branch
+ // instruction so that two return addresses are not pushed onto the stack
+ // when the requested function finally gets called. This also makes the
+ // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+ // If the target address is within 32-bit range of the stub, use a
+ // PC-relative branch instead of loading the actual address. (This is
+ // considerably shorter than the 64-bit immediate load already there.)
+ // We assume here intptr_t is 64 bits.
+ intptr_t diff = NewVal-RetAddr+7;
+ if (diff >= -2147483648LL && diff <= 2147483647LL) {
+ *(unsigned char*)(RetAddr-0xc) = 0xE9;
+ *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+ } else {
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+ }
+ sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
+#else
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
+ sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
+#endif
+ }
+
+ // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+ *RetAddrLoc -= 0xd;
+#else
+ *RetAddrLoc -= 5;
+#endif
+}
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ if (Subtarget->hasSSE1())
+ return X86CompilationCallback_SSE;
+#endif
+
+ return X86CompilationCallback;
+}
+
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ useGOT = 0;
+ TLSOffset = 0;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+ const unsigned Alignment = 8;
+ uint8_t Buffer[8];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
+ MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
+#else
+ const unsigned Alignment = 4;
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
+#endif
+ return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
+}
+
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+ // The 64-bit stub contains:
+ // movabs r10 <- 8-byte-target-address # 10 bytes
+ // call|jmp *r10 # 3 bytes
+ // The 32-bit stub contains a 5-byte call|jmp.
+ // If the stub is a call to the compilation callback, an extra byte is added
+ // to mark it as a stub.
+ StubLayout Result = {14, 4};
+ return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+ Target != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+ bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
+#endif
+ JCE.emitAlignment(4);
+ void *Result = (void*)JCE.getCurrentPCValue();
+ if (NotCC) {
+#if defined (X86_64_JIT)
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // jmpq *r10
+ JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+ return Result;
+ }
+
+#if defined (X86_64_JIT)
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // callq *r10
+ JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+ JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
+
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+
+ // This used to use 0xCD, but that value is used by JITMemoryManager to
+ // initialize the buffer with garbage, which means it may follow a
+ // noreturn function call, confusing X86CompilationCallback2. PR 4929.
+ JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
+ return Result;
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+ return BB - Entry;
+#else
+ return BB - PICBase;
+#endif
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_picrel_word: {
+ // PIC base relative relocation, add the relocated value to the value
+ // already in memory, after we adjust it for where the PIC base is.
+ ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_absolute_word:
+ case X86::reloc_absolute_word_sext:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ case X86::reloc_absolute_dword:
+ *((intptr_t*)RelocPos) += ResultPtr;
+ break;
+ }
+ }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+ TLSOffset -= size;
+ return TLSOffset;
+#else
+ llvm_unreachable("Cannot allocate thread local storage on this arch!");
+ return 0;
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.h b/contrib/llvm/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 0000000..238420c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,81 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class X86TargetMachine;
+ class X86Subtarget;
+
+ class X86JITInfo : public TargetJITInfo {
+ X86TargetMachine &TM;
+ const X86Subtarget *Subtarget;
+ uintptr_t PICBase;
+ char* TLSOffset;
+ public:
+ explicit X86JITInfo(X86TargetMachine &tm);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on X86.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE);
+
+ /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+ /// specific basic block.
+ virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// allocateThreadLocalMemory - Each target has its own way of
+ /// handling thread local variables. This method returns a value only
+ /// meaningful to the target.
+ virtual char* allocateThreadLocalMemory(size_t size);
+
+ /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+ /// PIC jumptable entry.
+ void setPICBase(uintptr_t Base) { PICBase = Base; }
+ uintptr_t getPICBase() const { return PICBase; }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
new file mode 100644
index 0000000..50bc14d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -0,0 +1,701 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86AsmPrinter.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+ X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+ MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+ return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
+/// operand to an MCSymbol.
+MCSymbol *X86MCInstLower::
+GetSymbolFromOperand(const MachineOperand &MO) const {
+ assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+
+ SmallString<128> Name;
+
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol());
+ Name += MAI.getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ isImplicitlyPrivate = true;
+
+ Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ switch (MO.getTargetFlags()) {
+ default: break;
+ case X86II::MO_DLLIMPORT: {
+ // Handle dllimport linkage.
+ const char *Prefix = "__imp_";
+ Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+ break;
+ }
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getHiddenGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_STUB: {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = 0;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ // These affect the name of the symbol, not any suffix.
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ break;
+
+ case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+ Ctx),
+ Ctx);
+ break;
+ case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+ case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
+ case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
+ case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+ case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+ case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
+ case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break;
+ case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
+ case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+ Ctx);
+ if (MO.isJTI() && MAI.hasSetDirective()) {
+ // If .set directive is supported, use it to reduce the number of
+ // relocations the assembler will generate for differences between
+ // local labels. This is only safe when the symbols are in the same
+ // section so we are restricting it to jumptable references.
+ MCSymbol *Label = Ctx.CreateTempSymbol();
+ AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+ Expr = MCSymbolRefExpr::Create(Label, Ctx);
+ }
+ break;
+ }
+
+ if (Expr == 0)
+ Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg32.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (Reg != 0)
+ MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg64.
+ for (unsigned i = 0; i != 4; ++i) {
+ if (!MI->getOperand(OpNo+i).isReg()) continue;
+
+ unsigned Reg = MI->getOperand(OpNo+i).getReg();
+ if (Reg == 0) continue;
+
+ MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+ }
+}
+
+/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8.
+static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) {
+ OutMI.setOpcode(NewOpc);
+ lower_subreg32(&OutMI, 0);
+}
+/// LowerUnaryToTwoAddr - R = setb -> R = sbb R, R
+static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
+ OutMI.setOpcode(NewOpc);
+ OutMI.addOperand(OutMI.getOperand(0));
+ OutMI.addOperand(OutMI.getOperand(0));
+}
+
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+ unsigned ImmOp = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+ ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+ Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(0).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(ImmOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
+ bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+ Inst.getOperand(AddrBase + 0).isReg() && // base
+ Inst.getOperand(AddrBase + 1).isImm() && // scale
+ Inst.getOperand(AddrBase + 2).isReg() && // index register
+ (Inst.getOperand(AddrOp).isExpr() || // address
+ Inst.getOperand(AddrOp).isImm())&&
+ Inst.getOperand(AddrBase + 4).isReg() && // segment
+ "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(RegOp).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // Check whether this is an absolute address.
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // to do this here.
+ bool Absolute = true;
+ if (Inst.getOperand(AddrOp).isExpr()) {
+ const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+ Absolute = false;
+ }
+
+ if (Absolute &&
+ (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1))
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(AddrOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO,
+ AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+
+ // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
+ switch (OutMI.getOpcode()) {
+ case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+ lower_lea64_32mem(&OutMI, 1);
+ // FALL THROUGH.
+ case X86::LEA64r:
+ case X86::LEA16r:
+ case X86::LEA32r:
+ // LEA should have a segment register, but it must be empty.
+ assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+ "Unexpected # of LEA operands");
+ assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+ "LEA has segment specified!");
+ break;
+ case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
+ case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
+ case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
+ case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+ case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+ case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
+ case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
+ case X86::SETB_C8r: LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break;
+ case X86::SETB_C16r: LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break;
+ case X86::SETB_C32r: LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break;
+ case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
+ case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
+ case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
+ case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+ case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+ case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
+
+ case X86::MOV16r0:
+ LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
+ LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+ break;
+ case X86::MOV64r0:
+ LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
+ LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+ break;
+
+ // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
+ // register inputs modeled as normal uses instead of implicit uses. As such,
+ // truncate off all but the first operand (the callee). FIXME: Change isel.
+ case X86::TAILJMPr64:
+ case X86::CALL64r:
+ case X86::CALL64pcrel32:
+ case X86::WINCALL64r:
+ case X86::WINCALL64pcrel32: {
+ unsigned Opcode = OutMI.getOpcode();
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::RET);
+ break;
+ }
+
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: {
+ unsigned Opcode;
+ switch (OutMI.getOpcode()) {
+ default: assert(0 && "Invalid opcode");
+ case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+ }
+
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
+ // this with an ugly goto in case the resultant OR uses EAX and needs the
+ // short form.
+ case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+ case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+ case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+ case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+ case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+ case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+ case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+ case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+
+ // The assembler backend wants to see branches in their small form and relax
+ // them to their large form. The JIT can only handle the large form because
+ // it does not do relaxation. For now, translate the large form to the
+ // small one here.
+ case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
+ case X86::JO_4: OutMI.setOpcode(X86::JO_1); break;
+ case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
+ case X86::JB_4: OutMI.setOpcode(X86::JB_1); break;
+ case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
+ case X86::JE_4: OutMI.setOpcode(X86::JE_1); break;
+ case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
+ case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
+ case X86::JA_4: OutMI.setOpcode(X86::JA_1); break;
+ case X86::JS_4: OutMI.setOpcode(X86::JS_1); break;
+ case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
+ case X86::JP_4: OutMI.setOpcode(X86::JP_1); break;
+ case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
+ case X86::JL_4: OutMI.setOpcode(X86::JL_1); break;
+ case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
+ case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
+ case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+
+ // Atomic load and store require a separate pseudo-inst because Acquire
+ // implies mayStore and Release implies mayLoad; fix these to regular MOV
+ // instructions here
+ case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
+ case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
+ case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
+ case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
+ case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+
+ case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
+ case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
+ case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
+ case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
+ case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
+ case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
+ case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
+ case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
+ case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
+ case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
+ case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
+ case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
+ case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
+ case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
+ case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
+ case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
+ case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
+ case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
+ case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
+ case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
+ case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
+ case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
+ case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
+ case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
+ case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
+ case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
+ case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
+ case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
+ case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
+ case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+ case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+ case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+ case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
+ case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
+ case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
+ case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
+ }
+}
+
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+ X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+ MCContext &context = OutStreamer.getContext();
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+ MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+ const MCSymbolRefExpr *symRef =
+ MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+ MCInst LEA;
+ if (is64Bits) {
+ LEA.setOpcode(X86::LEA64r);
+ LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(0)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ }
+ OutStreamer.EmitInstruction(LEA);
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::REX64_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+
+ MCInst call;
+ if (is64Bits)
+ call.setOpcode(X86::CALL64pcrel32);
+ else
+ call.setOpcode(X86::CALLpcrel32);
+ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+ MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+ const MCSymbolRefExpr *tlsRef =
+ MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::VK_PLT,
+ context);
+
+ call.addOperand(MCOperand::CreateExpr(tlsRef));
+ OutStreamer.EmitInstruction(call);
+}
+
+void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ OutStreamer.EmitCodeRegion();
+
+ X86MCInstLower MCInstLowering(Mang, *MF, *this);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ std::string TmpStr;
+ raw_string_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ unsigned Reg = MI->getOperand(0).getReg();
+ OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ // Lower these as normal, but add some comments.
+ OutStreamer.AddComment("TAILCALL");
+ break;
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
+ case X86::MOVPC32r: {
+ MCInst TmpInst;
+ // This is a pseudo op for a two instruction sequence with a label, which
+ // looks like:
+ // call "L1$pb"
+ // "L1$pb":
+ // popl %esi
+
+ // Emit the call.
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ TmpInst.setOpcode(X86::CALLpcrel32);
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
+ OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+
+ // popl $reg
+ TmpInst.setOpcode(X86::POP32r);
+ TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ case X86::ADD32ri: {
+ // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+ if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ break;
+
+ // Okay, we have something like:
+ // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+
+ // For this, we want to print something like:
+ // MYGLOBAL + (. - PICBASE)
+ // However, we can't generate a ".", so just emit a new label here and refer
+ // to it.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+
+ // Now that we have emitted the label, lower the complex operand expression.
+ MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ const MCExpr *PICBase =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+ DotExpr, OutContext);
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::ADD32ri);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.h b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
new file mode 100644
index 0000000..0210072
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
@@ -0,0 +1,52 @@
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCINSTLOWER_H
+#define X86_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineFunction;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+ class TargetMachine;
+ class X86AsmPrinter;
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
+ MCContext &Ctx;
+ Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
+ X86AsmPrinter &AsmPrinter;
+public:
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+ MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 0000000..b0bb313
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,135 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+ /// ForceFramePointer - True if the function is required to use of frame
+ /// pointer for reasons other than it containing dynamic allocation or
+ /// that FP eliminatation is turned off. For example, Cygwin main function
+ /// contains stack pointer re-alignment code which requires FP.
+ bool ForceFramePointer;
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+ /// to the space used by the return address).
+ /// Used on windows platform for stdcall & fastcall name decoration
+ unsigned BytesToPopOnReturn;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ /// TailCallReturnAddrDelta - The number of bytes by which return address
+ /// stack slot is moved as the result of tail call optimization.
+ int TailCallReturnAddrDelta;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// RegSaveFrameIndex - X86-64 vararg func register save area.
+ int RegSaveFrameIndex;
+ /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+ unsigned VarArgsGPOffset;
+ /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+ unsigned VarArgsFPOffset;
+ /// ArgumentStackSize - The number of bytes on stack consumed by the arguments
+ /// being passed on the stack.
+ unsigned ArgumentStackSize;
+
+public:
+ X86MachineFunctionInfo() : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0),
+ VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0),
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0) {}
+
+ explicit X86MachineFunctionInfo(MachineFunction &MF)
+ : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0),
+ VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0),
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+ void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+ void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+ int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+ void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+ unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+ void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+ unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+ void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+
+ unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+ void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 0000000..c1ac9f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,870 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "X86GenRegisterInfo.inc"
+
+using namespace llvm;
+
+cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP,
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)),
+ TM(tm), TII(tii) {
+ X86_MC::InitLLVM2SEHRegisterMapping(this);
+
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ IsWin64 = Subtarget->isTargetWin64();
+
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+}
+
+/// getCompactUnwindRegNum - This function maps the register to the number for
+/// compact unwind encoding. Return -1 if the register isn't valid.
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+ switch (getLLVMRegNum(RegNum, isEH)) {
+ case X86::EBX: case X86::RBX: return 1;
+ case X86::ECX: case X86::R12: return 2;
+ case X86::EDX: case X86::R13: return 3;
+ case X86::EDI: case X86::R14: return 4;
+ case X86::ESI: case X86::R15: return 5;
+ case X86::EBP: case X86::RBP: return 6;
+ }
+
+ return -1;
+}
+
+int
+X86RegisterInfo::getSEHRegNum(unsigned i) const {
+ int reg = X86_MC::getX86RegNum(i);
+ switch (i) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ reg += 8;
+ }
+ return reg;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
+ unsigned Idx) const {
+ // The sub_8bit sub-register index is more constrained in 32-bit mode.
+ // It behaves just like the sub_8bit_hi index.
+ if (!Is64Bit && Idx == X86::sub_8bit)
+ Idx = X86::sub_8bit_hi;
+
+ // Forward to TableGen's default version.
+ return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ switch (SubIdx) {
+ default: return 0;
+ case X86::sub_8bit:
+ if (B == &X86::GR8RegClass) {
+ if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+ A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::GR8_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_NOREXRegClass;
+ else if (A == &X86::GR32_ABCDRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_NOREXRegClass;
+ else if (A == &X86::GR16_ABCDRegClass)
+ return &X86::GR16_ABCDRegClass;
+ }
+ break;
+ case X86::sub_8bit_hi:
+ if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
+ switch (A->getSize()) {
+ case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
+ case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
+ case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
+ default: return 0;
+ }
+ break;
+ case X86::sub_16bit:
+ if (B == &X86::GR16RegClass) {
+ if (A->getSize() == 4 || A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR16_ABCDRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ } else if (B == &X86::GR16_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_NOREXRegClass;
+ else if (A == &X86::GR32_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ }
+ break;
+ case X86::sub_32bit:
+ if (B == &X86::GR32RegClass) {
+ if (A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR32_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+ return &X86::GR64_NOSPRegClass;
+ if (A->getSize() == 8)
+ return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
+ } else if (B == &X86::GR32_ABCDRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREX_NOSPRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREX_NOSPRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ }
+ break;
+ case X86::sub_ss:
+ if (B == &X86::FR32RegClass)
+ return A;
+ break;
+ case X86::sub_sd:
+ if (B == &X86::FR64RegClass)
+ return A;
+ break;
+ case X86::sub_xmm:
+ if (B == &X86::VR128RegClass)
+ return A;
+ break;
+ }
+ return 0;
+}
+
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ // Don't allow super-classes of GR8_NOREX. This class is only used after
+ // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied
+ // to the full GR8 register class in 64-bit mode, so we cannot allow the
+ // reigster class inflation.
+ //
+ // The GR8_NOREX class is always used in a way that won't be constrained to a
+ // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
+ // full GR8 class.
+ if (RC == X86::GR8_NOREXRegisterClass)
+ return RC;
+
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+ do {
+ switch (Super->getID()) {
+ case X86::GR8RegClassID:
+ case X86::GR16RegClassID:
+ case X86::GR32RegClassID:
+ case X86::GR64RegClassID:
+ case X86::FR32RegClassID:
+ case X86::FR64RegClassID:
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ case X86::VR128RegClassID:
+ case X86::VR256RegClassID:
+ // Don't return a super-class that would shrink the spill size.
+ // That can happen with the vector and float classes.
+ if (Super->getSize() == RC->getSize())
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+ case 0: // Normal GPRs.
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64RegClass;
+ return &X86::GR32RegClass;
+ case 1: // Normal GPRs except the stack pointer (for encoding reasons).
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_NOSPRegClass;
+ return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_TCRegClass;
+ return &X86::GR32_TCRegClass;
+ }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &X86::CCRRegClass) {
+ if (Is64Bit)
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+ }
+ return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 12 - FPDiff;
+ case X86::VR128RegClassID:
+ return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
+}
+
+const unsigned *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool callsEHReturn = false;
+ bool ghcCall = false;
+
+ if (MF) {
+ callsEHReturn = MF->getMMI().callsEHReturn();
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ static const unsigned GhcCalleeSavedRegs[] = {
+ 0
+ };
+
+ static const unsigned CalleeSavedRegs32Bit[] = {
+ X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs32EHRet[] = {
+ X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64Bit[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64EHRet[] = {
+ X86::RAX, X86::RDX, X86::RBX, X86::R12,
+ X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegsWin64[] = {
+ X86::RBX, X86::RBP, X86::RDI, X86::RSI,
+ X86::R12, X86::R13, X86::R14, X86::R15,
+ X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9,
+ X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
+ X86::XMM14, X86::XMM15, 0
+ };
+
+ if (ghcCall) {
+ return GhcCalleeSavedRegs;
+ } else if (Is64Bit) {
+ if (IsWin64)
+ return CalleeSavedRegsWin64;
+ else
+ return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
+ } else {
+ return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+ }
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Set the stack-pointer register and its aliases as reserved.
+ Reserved.set(X86::RSP);
+ Reserved.set(X86::ESP);
+ Reserved.set(X86::SP);
+ Reserved.set(X86::SPL);
+
+ // Set the instruction pointer register and its aliases as reserved.
+ Reserved.set(X86::RIP);
+ Reserved.set(X86::EIP);
+ Reserved.set(X86::IP);
+
+ // Set the frame-pointer register and its aliases as reserved if needed.
+ if (TFI->hasFP(MF)) {
+ Reserved.set(X86::RBP);
+ Reserved.set(X86::EBP);
+ Reserved.set(X86::BP);
+ Reserved.set(X86::BPL);
+ }
+
+ // Mark the segment registers as reserved.
+ Reserved.set(X86::CS);
+ Reserved.set(X86::SS);
+ Reserved.set(X86::DS);
+ Reserved.set(X86::ES);
+ Reserved.set(X86::FS);
+ Reserved.set(X86::GS);
+
+ // Reserve the registers that only exist in 64-bit mode.
+ if (!Is64Bit) {
+ // These 8-bit registers are part of the x86-64 extension even though their
+ // super-registers are old 32-bits.
+ Reserved.set(X86::SIL);
+ Reserved.set(X86::DIL);
+ Reserved.set(X86::BPL);
+ Reserved.set(X86::SPL);
+
+ for (unsigned n = 0; n != 8; ++n) {
+ // R8, R9, ...
+ const unsigned GPR64[] = {
+ X86::R8, X86::R9, X86::R10, X86::R11,
+ X86::R12, X86::R13, X86::R14, X86::R15
+ };
+ for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
+ Reserved.set(Reg);
+
+ // XMM8, XMM9, ...
+ assert(X86::XMM15 == X86::XMM8+7);
+ for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+ ++AI)
+ Reserved.set(Reg);
+ }
+ }
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return (RealignStack &&
+ !MFI->hasVarSizedObjects());
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas.
+ // FIXME: It's more complicated than this...
+ if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+ report_fatal_error(
+ "Stack realignment in presence of dynamic allocas is not supported");
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (Reg == FramePtr && TFI->hasFP(MF)) {
+ FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+ return true;
+ }
+ return false;
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
+
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
+ MBB.insert(I, New);
+ }
+
+ return;
+ }
+
+ if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // We are not tracking the stack pointer adjustment by the callee, so make
+ // sure we restore the stack pointer immediately after the call, there may
+ // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+ MachineBasicBlock::iterator B = MBB.begin();
+ while (I != B && !llvm::prior(I)->getDesc().isCall())
+ --I;
+ MBB.insert(I, New);
+ }
+}
+
+void
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ unsigned BasePtr;
+
+ unsigned Opc = MI.getOpcode();
+ bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
+ if (needsStackRealignment(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+ else if (AfterFPPop)
+ BasePtr = StackPtr;
+ else
+ BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+
+ // This must be part of a four operand memory reference. Replace the
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ // Now add the frame object offset to the offset from EBP.
+ int FIOffset;
+ if (AfterFPPop) {
+ // Tail call jmp happens after FP is popped.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
+ } else
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+
+ if (MI.getOperand(i+3).isImm()) {
+ // Offset is a 32-bit integer.
+ int Imm = (int)(MI.getOperand(i + 3).getImm());
+ int Offset = FIOffset + Imm;
+ assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+ "Requesting 64-bit offset in 32-bit immediate!");
+ MI.getOperand(i + 3).ChangeToImmediate(Offset);
+ } else {
+ // Offset is symbolic. This is extremely rare.
+ uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
+ MI.getOperand(i+3).setOffset(Offset);
+ }
+}
+
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? FramePtr : StackPtr;
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return Reg;
+ case MVT::i8:
+ if (High) {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case MVT::i16:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case MVT::i32:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+
+ return Reg;
+}
+}
+
+namespace {
+ struct MSAH : public MachineFunctionPass {
+ static char ID;
+ MSAH() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned StackAlignment = TFI->getStackAlignment();
+
+ // Be over-conservative: scan over all vreg defs and find whether vector
+ // registers are used. If yes, there is a possibility that vector register
+ // will be spilled and thus require dynamic stack realignment.
+ for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
+ FuncInfo->setForceFramePointer(true);
+ return true;
+ }
+ }
+ // Nothing to do
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 Maximal Stack Alignment Check";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+ char MSAH::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 0000000..7d39c68
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,138 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "X86GenRegisterInfo.inc"
+
+namespace llvm {
+ class Type;
+ class TargetInstrInfo;
+ class X86TargetMachine;
+
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+ X86TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+private:
+ /// Is64Bit - Is the target 64-bits.
+ ///
+ bool Is64Bit;
+
+ /// IsWin64 - Is the target on of win64 flavours
+ ///
+ bool IsWin64;
+
+ /// SlotSize - Stack slot size in bytes.
+ ///
+ unsigned SlotSize;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ ///
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ ///
+ unsigned FramePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// getX86RegNum - Returns the native X86 register number for the given LLVM
+ /// register identifier.
+ static unsigned getX86RegNum(unsigned RegNo);
+
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const;
+
+ /// getCompactUnwindRegNum - This function maps the register to the number for
+ /// compact unwind encoding. Return -1 if the register isn't valid.
+ int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
+
+ /// Code Generation virtual methods...
+ ///
+
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ virtual const TargetRegisterClass *
+ getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+ /// values.
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns NULL if it is possible to copy
+ /// between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ /// getCalleeSavedRegs - Return a null-terminated list of all of the
+ /// callee-save registers on this target.
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ /// getReservedRegs - Returns a bitset indexed by physical register number
+ /// indicating if a register is a special register that has particular uses and
+ /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+ /// register scavenger to determine what registers are free.
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getStackRegister() const { return StackPtr; }
+ // FIXME: Move to FrameInfok
+ unsigned getSlotSize() const { return SlotSize; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 0000000..9a7db36
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,474 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+let Namespace = "X86" in {
+
+ // Subregister indices.
+ def sub_8bit : SubRegIndex;
+ def sub_8bit_hi : SubRegIndex;
+ def sub_16bit : SubRegIndex;
+ def sub_32bit : SubRegIndex;
+
+ def sub_ss : SubRegIndex;
+ def sub_sd : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+
+
+ // In the register alias definitions below, we define which registers alias
+ // which others. We only specify which registers the small registers alias,
+ // because the register file generator is smart enough to figure out that
+ // AL aliases AX if we tell it that AX aliased AL (for example).
+
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
+ // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+ // and debug information on X86-32/Darwin)
+
+ // 8-bit registers
+ // Low registers
+ def AL : Register<"al">;
+ def DL : Register<"dl">;
+ def CL : Register<"cl">;
+ def BL : Register<"bl">;
+
+ // X86-64 only, requires REX.
+ let CostPerUse = 1 in {
+ def SIL : Register<"sil">;
+ def DIL : Register<"dil">;
+ def BPL : Register<"bpl">;
+ def SPL : Register<"spl">;
+ def R8B : Register<"r8b">;
+ def R9B : Register<"r9b">;
+ def R10B : Register<"r10b">;
+ def R11B : Register<"r11b">;
+ def R12B : Register<"r12b">;
+ def R13B : Register<"r13b">;
+ def R14B : Register<"r14b">;
+ def R15B : Register<"r15b">;
+ }
+
+ // High registers. On x86-64, these cannot be used in any instruction
+ // with a REX prefix.
+ def AH : Register<"ah">;
+ def DH : Register<"dh">;
+ def CH : Register<"ch">;
+ def BH : Register<"bh">;
+
+ // 16-bit registers
+ let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
+ def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
+ def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
+ def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
+ def BX : RegisterWithSubRegs<"bx", [BL,BH]>;
+ }
+ let SubRegIndices = [sub_8bit] in {
+ def SI : RegisterWithSubRegs<"si", [SIL]>;
+ def DI : RegisterWithSubRegs<"di", [DIL]>;
+ def BP : RegisterWithSubRegs<"bp", [BPL]>;
+ def SP : RegisterWithSubRegs<"sp", [SPL]>;
+ }
+ def IP : Register<"ip">;
+
+ // X86-64 only, requires REX.
+ let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
+ def R8W : RegisterWithSubRegs<"r8w", [R8B]>;
+ def R9W : RegisterWithSubRegs<"r9w", [R9B]>;
+ def R10W : RegisterWithSubRegs<"r10w", [R10B]>;
+ def R11W : RegisterWithSubRegs<"r11w", [R11B]>;
+ def R12W : RegisterWithSubRegs<"r12w", [R12B]>;
+ def R13W : RegisterWithSubRegs<"r13w", [R13B]>;
+ def R14W : RegisterWithSubRegs<"r14w", [R14B]>;
+ def R15W : RegisterWithSubRegs<"r15w", [R15B]>;
+ }
+ // 32-bit registers
+ let SubRegIndices = [sub_16bit] in {
+ def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>;
+ def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>;
+ def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>;
+ def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>;
+ def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>;
+ def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>;
+ def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>;
+ def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>;
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>;
+
+ // X86-64 only, requires REX
+ let CostPerUse = 1 in {
+ def R8D : RegisterWithSubRegs<"r8d", [R8W]>;
+ def R9D : RegisterWithSubRegs<"r9d", [R9W]>;
+ def R10D : RegisterWithSubRegs<"r10d", [R10W]>;
+ def R11D : RegisterWithSubRegs<"r11d", [R11W]>;
+ def R12D : RegisterWithSubRegs<"r12d", [R12W]>;
+ def R13D : RegisterWithSubRegs<"r13d", [R13W]>;
+ def R14D : RegisterWithSubRegs<"r14d", [R14W]>;
+ def R15D : RegisterWithSubRegs<"r15d", [R15W]>;
+ }}
+
+ // 64-bit registers, X86-64 only
+ let SubRegIndices = [sub_32bit] in {
+ def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
+ def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
+ def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
+ def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
+ def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
+ def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
+ def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
+ def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+ // These also require REX.
+ let CostPerUse = 1 in {
+ def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
+ def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
+ def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
+ def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
+ def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
+ def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
+ def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
+ def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
+ def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+ }}
+
+ // MMX Registers. These are actually aliased to ST0 .. ST7
+ def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
+ def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
+ def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
+ def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
+ def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
+ def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
+ def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
+ def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
+
+ // Pseudo Floating Point registers
+ def FP0 : Register<"fp0">;
+ def FP1 : Register<"fp1">;
+ def FP2 : Register<"fp2">;
+ def FP3 : Register<"fp3">;
+ def FP4 : Register<"fp4">;
+ def FP5 : Register<"fp5">;
+ def FP6 : Register<"fp6">;
+
+ // XMM Registers, used by the various SSE instruction set extensions.
+ // The sub_ss and sub_sd subregs are the same registers with another regclass.
+ let CompositeIndices = [(sub_ss), (sub_sd)] in {
+ def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
+ def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
+ def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
+ def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
+ def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
+ def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
+ def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
+ def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
+
+ // X86-64 only
+ let CostPerUse = 1 in {
+ def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
+ def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
+ def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
+ def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
+ def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
+ def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
+ def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
+ def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+ }}
+
+ // YMM Registers, used by AVX instructions
+ let SubRegIndices = [sub_xmm] in {
+ def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>;
+ def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>;
+ def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>;
+ def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>;
+ def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>;
+ def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>;
+ def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>;
+ def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>;
+ def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>;
+ def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>;
+ def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>;
+ def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>;
+ def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>;
+ def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>;
+ def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>;
+ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
+ }
+
+ class STRegister<string Name, list<Register> A> : Register<Name> {
+ let Aliases = A;
+ }
+
+ // Floating point stack registers. These don't map one-to-one to the FP
+ // pseudo registers, but we still mark them as aliasing FP registers. That
+ // way both kinds can be live without exceeding the stack depth. ST registers
+ // are only live around inline assembly.
+ def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>;
+ def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>;
+ def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>;
+ def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>;
+ def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>;
+ def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>;
+ def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
+ def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
+
+ // Status flags register
+ def EFLAGS : Register<"flags">;
+
+ // Segment registers
+ def CS : Register<"cs">;
+ def DS : Register<"ds">;
+ def SS : Register<"ss">;
+ def ES : Register<"es">;
+ def FS : Register<"fs">;
+ def GS : Register<"gs">;
+
+ // Debug registers
+ def DR0 : Register<"dr0">;
+ def DR1 : Register<"dr1">;
+ def DR2 : Register<"dr2">;
+ def DR3 : Register<"dr3">;
+ def DR4 : Register<"dr4">;
+ def DR5 : Register<"dr5">;
+ def DR6 : Register<"dr6">;
+ def DR7 : Register<"dr7">;
+
+ // Control registers
+ def CR0 : Register<"cr0">;
+ def CR1 : Register<"cr1">;
+ def CR2 : Register<"cr2">;
+ def CR3 : Register<"cr3">;
+ def CR4 : Register<"cr4">;
+ def CR5 : Register<"cr5">;
+ def CR6 : Register<"cr6">;
+ def CR7 : Register<"cr7">;
+ def CR8 : Register<"cr8">;
+ def CR9 : Register<"cr9">;
+ def CR10 : Register<"cr10">;
+ def CR11 : Register<"cr11">;
+ def CR12 : Register<"cr12">;
+ def CR13 : Register<"cr13">;
+ def CR14 : Register<"cr14">;
+ def CR15 : Register<"cr15">;
+
+ // Pseudo index registers
+ def EIZ : Register<"eiz">;
+ def RIZ : Register<"riz">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8], 8,
+ (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+ let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ (add AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
+}
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+def GR64 : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32 sub_32bit)];
+}
+
+// Segment registers for use by MOV instructions (and others) that have a
+// segment register as one operand. Always contain a 16-bit segment
+// descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
+}
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
+}
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit),
+ (GR32_ABCD sub_32bit)];
+}
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+ R8, R9, R11, RIP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_TC sub_32bit)];
+}
+
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+ R8, R9, R11)>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+ (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+ let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+ (add AX, CX, DX, SI, DI, BX, BP, SP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
+}
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
+}
+
+// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit
+// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs
+// to clear upper 32-bits of RAX so is not a NOP.
+def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_NOSP sub_32bit)];
+}
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+ (and GR32_NOREX, GR32_NOSP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
+}
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+ (and GR64_NOREX, GR64_NOSP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX_NOSP sub_32bit)];
+}
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
+}
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
+
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values. This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware. In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+ let isAllocatable = 0;
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (add FR32)> {
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+}
+
+def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ 256, (sequence "YMM%u", 0, 15)> {
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Relocations.h b/contrib/llvm/lib/Target/X86/X86Relocations.h
new file mode 100644
index 0000000..990962d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,52 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace X86 {
+ /// RelocationType - An enum for the x86 relocation codes. Note that
+ /// the terminology here doesn't follow x86 convention - word means
+ /// 32-bit and dword means 64-bit. The relocations will be treated
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// emitter but JIT and ObjectCode will treat them differently
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 0000000..6406bce
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,259 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+ TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ TLI(*TM.getTargetLowering()) {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ Subtarget->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ EVT IntPtr = TLI.getPointerTy();
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+ DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ EVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ EVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, it is more efficient to call the library. However
+ /// if calling the library is not allowed (AlwaysInline), then soldier on as
+ /// the code generated here is better than the long load-store sequence we
+ /// would otherwise get.
+ if (!AlwaysInline && (Align & 3) != 0)
+ return SDValue();
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256 ||
+ SrcPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
+ MVT AVT;
+ if (Align & 1)
+ AVT = MVT::i8;
+ else if (Align & 2)
+ AVT = MVT::i16;
+ else if (Align & 4)
+ // DWORD aligned
+ AVT = MVT::i32;
+ else
+ // QWORD aligned
+ AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+ array_lengthof(Ops));
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, AlwaysInline,
+ DstPtrInfo.getWithOffset(Offset),
+ SrcPtrInfo.getWithOffset(Offset)));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 0000000..d1d66fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,56 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ const X86TargetLowering &TLI;
+
+public:
+ explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+ ~X86SelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 0000000..7064dd0
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,373 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86InstrInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyBlockAddressReference() const {
+ if (isPICStyleGOT()) // 32-bit ELF targets.
+ return X86II::MO_GOTOFF;
+
+ if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Direct static reference to label.
+ return X86II::MO_NO_FLAG;
+}
+
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+ // DLLImport only exists on windows, it is implemented as a load from a
+ // DLLIMPORT stub.
+ if (GV->hasDLLImportLinkage())
+ return X86II::MO_DLLIMPORT;
+
+ // Determine whether this is a reference to a definition or a declaration.
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ // X86-64 in PIC mode.
+ if (isPICStyleRIPRel()) {
+ // Large model never uses stubs.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return X86II::MO_NO_FLAG;
+
+ if (isTargetDarwin()) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ if (GV->hasDefaultVisibility() &&
+ (isDecl || GV->isWeakForLinker()))
+ return X86II::MO_GOTPCREL;
+ } else if (!isTargetWin64()) {
+ assert(isTargetELF() && "Unknown rip-relative target");
+
+ // Extra load is needed for all externally visible.
+ if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+ return X86II::MO_GOTPCREL;
+ }
+
+ return X86II::MO_NO_FLAG;
+ }
+
+ if (isPICStyleGOT()) { // 32-bit ELF targets.
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return X86II::MO_GOTOFF;
+ return X86II::MO_GOT;
+ }
+
+ if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
+ // Determine whether we have a stub reference and/or whether the reference
+ // is relative to the PIC base or not.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage()) {
+ // Hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+ }
+
+ // Otherwise, no stub.
+ return X86II::MO_PIC_BASE_OFFSET;
+ }
+
+ if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
+ // Determine whether we have a stub reference.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_NO_FLAG;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY;
+
+ // Otherwise, no stub.
+ return X86II::MO_NO_FLAG;
+ }
+
+ // Direct static reference to global.
+ return X86II::MO_NO_FLAG;
+}
+
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+ // Darwin 10 has a __bzero entry point for this purpose.
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
+ return "__bzero";
+
+ return 0;
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+ if (In64BitMode)
+ return false;
+ return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+/// getSpecialAddressLatency - For targets where it is beneficial to
+/// backschedule instructions that compute addresses, return a value
+/// indicating the number of scheduling cycles of backscheduling that
+/// should be attempted.
+unsigned X86Subtarget::getSpecialAddressLatency() const {
+ // For x86 out-of-order targets, back-schedule address computations so
+ // that loads and stores aren't blocked.
+ // This value was chosen arbitrarily.
+ return 200;
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ return;
+
+ X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+
+ if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
+ if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); }
+ if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); }
+ if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); }
+ if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); }
+ if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
+ if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
+ if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
+ // FIXME: AVX codegen support is not ready.
+ //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); }
+
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+ if (IsIntel && ((ECX >> 1) & 0x1)) {
+ HasCLMUL = true;
+ ToggleFeature(X86::FeatureCLMUL);
+ }
+ if (IsIntel && ((ECX >> 12) & 0x1)) {
+ HasFMA3 = true;
+ ToggleFeature(X86::FeatureFMA3);
+ }
+ if (IsIntel && ((ECX >> 22) & 0x1)) {
+ HasMOVBE = true;
+ ToggleFeature(X86::FeatureMOVBE);
+ }
+ if (IsIntel && ((ECX >> 23) & 0x1)) {
+ HasPOPCNT = true;
+ ToggleFeature(X86::FeaturePOPCNT);
+ }
+ if (IsIntel && ((ECX >> 25) & 0x1)) {
+ HasAES = true;
+ ToggleFeature(X86::FeatureAES);
+ }
+ if (IsIntel && ((ECX >> 29) & 0x1)) {
+ HasF16C = true;
+ ToggleFeature(X86::FeatureF16C);
+ }
+ if (IsIntel && ((ECX >> 30) & 0x1)) {
+ HasRDRAND = true;
+ ToggleFeature(X86::FeatureRDRAND);
+ }
+
+ if ((ECX >> 13) & 0x1) {
+ HasCmpxchg16b = true;
+ ToggleFeature(X86::FeatureCMPXCHG16B);
+ }
+
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ X86_MC::DetectFamilyModel(EAX, Family, Model);
+ if (IsAMD || (Family == 6 && Model >= 13)) {
+ IsBTMemSlow = true;
+ ToggleFeature(X86::FeatureSlowBTMem);
+ }
+ // If it's Nehalem, unaligned memory access is fast.
+ if (Family == 15 && Model == 26) {
+ IsUAMemFast = true;
+ ToggleFeature(X86::FeatureFastUAMem);
+ }
+
+ X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 29) & 0x1) {
+ HasX86_64 = true;
+ ToggleFeature(X86::Feature64Bit);
+ }
+ if ((ECX >> 5) & 0x1) {
+ HasLZCNT = true;
+ ToggleFeature(X86::FeatureLZCNT);
+ }
+ if (IsAMD && ((ECX >> 6) & 0x1)) {
+ HasSSE4A = true;
+ ToggleFeature(X86::FeatureSSE4A);
+ }
+ if (IsAMD && ((ECX >> 16) & 0x1)) {
+ HasFMA4 = true;
+ ToggleFeature(X86::FeatureFMA4);
+ }
+ }
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
+ , PICStyle(PICStyles::None)
+ , X86SSELevel(NoMMXSSE)
+ , X863DNowLevel(NoThreeDNow)
+ , HasCMov(false)
+ , HasX86_64(false)
+ , HasPOPCNT(false)
+ , HasSSE4A(false)
+ , HasAVX(false)
+ , HasAES(false)
+ , HasCLMUL(false)
+ , HasFMA3(false)
+ , HasFMA4(false)
+ , HasMOVBE(false)
+ , HasRDRAND(false)
+ , HasF16C(false)
+ , HasLZCNT(false)
+ , HasBMI(false)
+ , IsBTMemSlow(false)
+ , IsUAMemFast(false)
+ , HasVectorUAMem(false)
+ , HasCmpxchg16b(false)
+ , stackAlignment(8)
+ // FIXME: this is a known good value for Yonah. How about others?
+ , MaxInlineSizeThreshold(128)
+ , TargetTriple(TT)
+ , In64BitMode(is64Bit)
+ , InNaClMode(false) {
+ // Determine default and user specified characteristics
+ if (!FS.empty() || !CPU.empty()) {
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ // Make sure 64-bit features are available in 64-bit mode. (But make sure
+ // SSE2 can be turned off explicitly.)
+ std::string FullFS = FS;
+ if (In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+64bit,+sse2," + FullFS;
+ else
+ FullFS = "+64bit,+sse2";
+ }
+
+ // If feature string is not empty, parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+ } else {
+ // Otherwise, use CPUID to auto-detect feature set.
+ AutoDetectSubtargetFeatures();
+
+ // Make sure 64-bit features are available in 64-bit mode.
+ if (In64BitMode) {
+ HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+ HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+
+ if (!HasAVX && X86SSELevel < SSE2) {
+ X86SSELevel = SSE2;
+ ToggleFeature(X86::FeatureSSE1);
+ ToggleFeature(X86::FeatureSSE2);
+ }
+ }
+ }
+
+ // It's important to keep the MCSubtargetInfo feature bits in sync with
+ // target data structure which is shared with MC code emitter, etc.
+ if (In64BitMode)
+ ToggleFeature(X86::Mode64Bit);
+
+ if (isTargetNaCl()) {
+ InNaClMode = true;
+ ToggleFeature(X86::ModeNaCl);
+ }
+
+ if (HasAVX)
+ X86SSELevel = NoMMXSSE;
+
+ DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
+ << ", 3DNowLevel " << X863DNowLevel
+ << ", 64bit " << HasX86_64 << "\n");
+ assert((!In64BitMode || HasX86_64) &&
+ "64-bit code requested on a subtarget that doesn't support it!");
+
+ if(EnableSegmentedStacks && !isTargetELF())
+ report_fatal_error("Segmented stacks are only implemented on ELF.");
+
+ // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+ // 32 and 64 bit) and for all 64-bit targets.
+ if (StackAlignOverride)
+ stackAlignment = StackAlignOverride;
+ else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+ isTargetSolaris() || In64BitMode)
+ stackAlignment = 16;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 0000000..3258d3d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,293 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CallingConv.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "X86GenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+///
+namespace PICStyles {
+enum Style {
+ StubPIC, // Used on i386-darwin in -fPIC mode.
+ StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+ GOT, // Used on many 32-bit unices in -fPIC mode.
+ RIPRel, // Used on X86-64 when not in -static mode.
+ None // Set when in -static mode (not PIC or DynamicNoPIC mode).
+};
+}
+
+class X86Subtarget : public X86GenSubtargetInfo {
+protected:
+ enum X86SSEEnum {
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+ };
+
+ enum X863DNowEnum {
+ NoThreeDNow, ThreeDNow, ThreeDNowA
+ };
+
+ /// PICStyle - Which PIC style to use
+ ///
+ PICStyles::Style PICStyle;
+
+ /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+ /// none supported.
+ X86SSEEnum X86SSELevel;
+
+ /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+ ///
+ X863DNowEnum X863DNowLevel;
+
+ /// HasCMov - True if this processor has conditional move instructions
+ /// (generally pentium pro+).
+ bool HasCMov;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ ///
+ bool HasX86_64;
+
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
+ /// HasSSE4A - True if the processor supports SSE4A instructions.
+ bool HasSSE4A;
+
+ /// HasAVX - Target has AVX instructions
+ bool HasAVX;
+
+ /// HasAES - Target has AES instructions
+ bool HasAES;
+
+ /// HasCLMUL - Target has carry-less multiplication
+ bool HasCLMUL;
+
+ /// HasFMA3 - Target has 3-operand fused multiply-add
+ bool HasFMA3;
+
+ /// HasFMA4 - Target has 4-operand fused multiply-add
+ bool HasFMA4;
+
+ /// HasMOVBE - True if the processor has the MOVBE instruction.
+ bool HasMOVBE;
+
+ /// HasRDRAND - True if the processor has the RDRAND instruction.
+ bool HasRDRAND;
+
+ /// HasF16C - Processor has 16-bit floating point conversion instructions.
+ bool HasF16C;
+
+ /// HasLZCNT - Processor has LZCNT instruction.
+ bool HasLZCNT;
+
+ /// HasBMI - Processor has BMI1 instructions.
+ bool HasBMI;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
+ /// IsUAMemFast - True if unaligned memory access is fast.
+ bool IsUAMemFast;
+
+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+ /// operands. This may require setting a feature bit in the processor.
+ bool HasVectorUAMem;
+
+ /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
+ /// this is true for most x86-64 chips, but not the first AMD chips.
+ bool HasCmpxchg16b;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ ///
+ unsigned MaxInlineSizeThreshold;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+private:
+ /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+ bool In64BitMode;
+
+ /// InNaClMode - True if compiling for Native Client target.
+ bool InNaClMode;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+ /// instruction.
+ void AutoDetectSubtargetFeatures();
+
+ bool is64Bit() const { return In64BitMode; }
+
+ PICStyles::Style getPICStyle() const { return PICStyle; }
+ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
+
+ bool hasCMov() const { return HasCMov; }
+ bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasSSE1() const { return X86SSELevel >= SSE1; }
+ bool hasSSE2() const { return X86SSELevel >= SSE2; }
+ bool hasSSE3() const { return X86SSELevel >= SSE3; }
+ bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool hasSSE41() const { return X86SSELevel >= SSE41; }
+ bool hasSSE42() const { return X86SSELevel >= SSE42; }
+ bool hasSSE4A() const { return HasSSE4A; }
+ bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
+ bool hasAVX() const { return HasAVX; }
+ bool hasXMM() const { return hasSSE1() || hasAVX(); }
+ bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
+ bool hasAES() const { return HasAES; }
+ bool hasCLMUL() const { return HasCLMUL; }
+ bool hasFMA3() const { return HasFMA3; }
+ bool hasFMA4() const { return HasFMA4; }
+ bool hasMOVBE() const { return HasMOVBE; }
+ bool hasRDRAND() const { return HasRDRAND; }
+ bool hasF16C() const { return HasF16C; }
+ bool hasLZCNT() const { return HasLZCNT; }
+ bool hasBMI() const { return HasBMI; }
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+ bool hasVectorUAMem() const { return HasVectorUAMem; }
+ bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetFreeBSD() const {
+ return TargetTriple.getOS() == Triple::FreeBSD;
+ }
+ bool isTargetSolaris() const {
+ return TargetTriple.getOS() == Triple::Solaris;
+ }
+
+ // ELF is a reasonably sane default and the only other X86 targets we
+ // support are Darwin and Windows. Just use "not those".
+ bool isTargetELF() const {
+ return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
+ }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NativeClient;
+ }
+ bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
+ bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
+
+ bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+ bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
+ bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
+ bool isTargetCygMing() const {
+ return isTargetMingw() || isTargetCygwin();
+ }
+
+ /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
+ bool isTargetCOFF() const {
+ return isTargetMingw() || isTargetCygwin() || isTargetWindows();
+ }
+
+ bool isTargetWin64() const {
+ // FIXME: x86_64-cygwin has not been released yet.
+ return In64BitMode && (isTargetCygMing() || isTargetWindows());
+ }
+
+ bool isTargetEnvMacho() const {
+ return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+ }
+
+ bool isTargetWin32() const {
+ return !In64BitMode && (isTargetMingw() || isTargetWindows());
+ }
+
+ bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+
+ bool isPICStyleStubPIC() const {
+ return PICStyle == PICStyles::StubPIC;
+ }
+
+ bool isPICStyleStubNoDynamic() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC;
+ }
+ bool isPICStyleStubAny() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC ||
+ PICStyle == PICStyles::StubPIC; }
+
+ /// ClassifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM)const;
+
+ /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyBlockAddressReference() const;
+
+ /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+ /// to immediate address.
+ bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
+
+ /// getSpecialAddressLatency - For targets where it is beneficial to
+ /// backschedule instructions that compute addresses, return a value
+ /// indicating the number of scheduling cycles of backscheduling that
+ /// should be attempted.
+ unsigned getSpecialAddressLatency() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 0000000..15c6c4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,158 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeX86Target() {
+ // Register the target.
+ RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+ RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
+}
+
+
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, false),
+ DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
+ "n8:16:32-S128" :
+ (getSubtargetImpl()->isTargetCygMing() ||
+ getSubtargetImpl()->isTargetWindows()) ?
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S128"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
+}
+
+/// X86TargetMachine ctor - Create an X86 target.
+///
+X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+ FrameLowering(*this, Subtarget),
+ ELFWriterInfo(is64Bit, true) {
+ // Determine the PICStyle based on the target selected.
+ if (getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ } else if (Subtarget.isTargetCygMing()) {
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.isTargetDarwin()) {
+ if (getRelocationModel() == Reloc::PIC_)
+ Subtarget.setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(getRelocationModel() == Reloc::DynamicNoPIC);
+ Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
+ } else if (Subtarget.isTargetELF()) {
+ Subtarget.setPICStyle(PICStyles::GOT);
+ }
+
+ // default to hard float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Hard;
+}
+
+//===----------------------------------------------------------------------===//
+// Command line options for x86
+//===----------------------------------------------------------------------===//
+static cl::opt<bool>
+UseVZeroUpper("x86-use-vzeroupper",
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::init(false));
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createX86ISelDag(*this, OptLevel));
+
+ // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+ if (!Subtarget.is64Bit())
+ PM.add(createGlobalBaseRegPass());
+
+ return false;
+}
+
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createX86MaxStackAlignmentHeuristicPass());
+ return false; // -print-machineinstr shouldn't print after this.
+}
+
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createX86FloatingPointStackifierPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ bool ShouldPrint = false;
+ if (OptLevel != CodeGenOpt::None &&
+ (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
+ PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ ShouldPrint = true;
+ }
+
+ if (Subtarget.hasAVX() && UseVZeroUpper) {
+ PM.add(createX86IssueVZeroUpperPass());
+ ShouldPrint = true;
+ }
+
+ return ShouldPrint;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 0000000..d1569aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,133 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+class StringRef;
+
+class X86TargetMachine : public LLVMTargetMachine {
+ X86Subtarget Subtarget;
+ X86FrameLowering FrameLowering;
+ X86ELFWriterInfo ELFWriterInfo;
+
+public:
+ X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit);
+
+ virtual const X86InstrInfo *getInstrInfo() const {
+ llvm_unreachable("getInstrInfo not implemented");
+ }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ llvm_unreachable("getJITInfo not implemented");
+ }
+ virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ llvm_unreachable("getTargetLowering not implemented");
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ llvm_unreachable("getSelectionDAGInfo not implemented");
+ }
+ virtual const X86RegisterInfo *getRegisterInfo() const {
+ return &getInstrInfo()->getRegisterInfo();
+ }
+ virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+
+ // Set up the pass pipeline.
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
+public:
+ X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
+public:
+ X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 0000000..991f322
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+const MCExpr *X8664_MachoTargetObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+
+ // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+ // is an indirect pc-relative reference.
+ if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+ const MCSymbol *Sym = Mang->getSymbol(GV);
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+ return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+ }
+
+ return TargetLoweringObjectFileMachO::
+ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 0000000..d7adf27
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,38 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+ class X86TargetMachine;
+
+ /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+ /// x86-64.
+ class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ public:
+ virtual const MCExpr *
+ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
+ // getCFIPersonalitySymbol - The symbol that gets passed to
+ // .cfi_personality.
+ virtual MCSymbol *
+ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
new file mode 100644
index 0000000..3958494
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -0,0 +1,105 @@
+//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts x86 AVX vzeroupper instructions
+// before calls to SSE encoded functions. This avoids transition latency
+// penalty when tranfering control between AVX encoded instructions and old
+// SSE encoding mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
+
+namespace {
+ struct VZeroUpperInserter : public MachineFunctionPass {
+ static char ID;
+ VZeroUpperInserter() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ };
+ char VZeroUpperInserter::ID = 0;
+}
+
+FunctionPass *llvm::createX86IssueVZeroUpperPass() {
+ return new VZeroUpperInserter();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// vzero upper instructions before function calls.
+bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ bool Changed = false;
+
+ // Process any unreachable blocks in arbitrary order now.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ Changed |= processBasicBlock(MF, *BB);
+
+ return Changed;
+}
+
+static bool isCallToModuleFn(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "Isn't a call instruction");
+
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isGlobal())
+ continue;
+
+ const GlobalValue *GV = MO.getGlobal();
+ GlobalValue::LinkageTypes LT = GV->getLinkage();
+ if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
+ (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
+ return true;
+
+ return false;
+ }
+ return false;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// inserting vzero upper instructions before function calls.
+bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = I->getDebugLoc();
+
+ // Insert a vzeroupper instruction before each control transfer
+ // to functions outside this module
+ if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
+ BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ ++NumVZU;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
new file mode 100644
index 0000000..42ab1b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -0,0 +1,29 @@
+//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+using namespace llvm;
+
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
+ SupportsDebugInformation = true;
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = 0;
+ ZeroDirective = "\t.space\t";
+ CommentString = "#";
+
+ PrivateGlobalPrefix = ".L";
+ AscizDirective = ".asciiz";
+ WeakDefDirective = "\t.weak\t";
+ WeakRefDirective = "\t.weak\t";
+
+ // Debug
+ HasLEB128 = true;
+}
+
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
new file mode 100644
index 0000000..8403922
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class XCoreMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
new file mode 100644
index 0000000..276e841
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -0,0 +1,88 @@
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCTargetDesc.h"
+#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "XCoreGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createXCoreMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitXCoreMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitXCoreMCRegisterInfo(X, XCore::LR);
+ return X;
+}
+
+static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new XCoreMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheXCoreTarget,
+ createXCoreMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+ createXCoreMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
new file mode 100644
index 0000000..3cfc3764
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCTARGETDESC_H
+#define XCOREMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheXCoreTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for XCore registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "XCoreGenRegisterInfo.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "XCoreGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
new file mode 100644
index 0000000..9a0971d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheXCoreTarget;
+
+extern "C" void LLVMInitializeXCoreTargetInfo() {
+ RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
new file mode 100644
index 0000000..b8fb0ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -0,0 +1,31 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class XCoreTargetMachine;
+ class formatted_raw_ostream;
+
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCore.td b/contrib/llvm/lib/Target/XCore/XCore.td
new file mode 100644
index 0000000..3840189
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.td
@@ -0,0 +1,46 @@
+//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"xs1b-generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCore : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = XCoreInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 0000000..8906b24
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,324 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+ cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+ cl::Hidden,
+ cl::value_desc("number"),
+ cl::init(8));
+
+namespace {
+ class XCoreAsmPrinter : public AsmPrinter {
+ const XCoreSubtarget &Subtarget;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+ public:
+ explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+
+ virtual const char *getPassName() const {
+ return "XCore Assembly Printer";
+ }
+
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const std::string &directive = ".jmptable");
+ void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ printInlineJT(MI, opNum, O, ".jmptable32");
+ }
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
+ virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+ void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ void EmitFunctionEntryLabel();
+ void EmitInstruction(const MachineInstr *MI);
+ void EmitFunctionBodyEnd();
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
+ assert(((GV->hasExternalLinkage() ||
+ GV->hasWeakLinkage()) ||
+ GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+ if (ArrayType *ATy = dyn_cast<ArrayType>(
+ cast<PointerType>(GV->getType())->getElementType())) {
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ // FIXME: MCStreamerize.
+ OutStreamer.EmitRawText(StringRef(".globound"));
+ OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
+ OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
+ ".globound");
+ }
+ }
+}
+
+void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (!GV->hasInitializer() ||
+ EmitSpecialLLVMGlobal(GV))
+ return;
+
+ const TargetData *TD = TM.getTargetData();
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const Constant *C = GV->getInitializer();
+ unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+
+ // Mark the start of the global
+ OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
+ GVSym->getName());
+
+ switch (GV->getLinkage()) {
+ case GlobalValue::AppendingLinkage:
+ report_fatal_error("AppendingLinkage is not supported by this target!");
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalLinkage:
+ emitArrayBound(GVSym, GV);
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::DLLImportLinkage:
+ llvm_unreachable("DLLImport linkage is not supported by this target!");
+ case GlobalValue::DLLExportLinkage:
+ llvm_unreachable("DLLExport linkage is not supported by this target!");
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+
+ EmitAlignment(Align > 2 ? Align : 2, GV);
+
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ if (GV->isThreadLocal()) {
+ Size *= MaxThreads;
+ }
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+ OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
+ Twine(Size));
+ }
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(C);
+ if (GV->isThreadLocal()) {
+ for (unsigned i = 1; i < MaxThreads; ++i)
+ EmitGlobalConstant(C);
+ }
+ // The ABI requires that unsigned scalar types smaller than 32 bits
+ // are padded to 32 bits.
+ if (Size < 4)
+ OutStreamer.EmitZeros(4 - Size, 0);
+
+ // Mark the end of the global
+ OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+ // Emit function end directives
+ OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
+ ".function");
+}
+
+void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+ // Mark the start of the function
+ OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
+ ".function," + CurrentFnSym->getName());
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1, O);
+}
+
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const std::string &directive) {
+ unsigned JTI = MI->getOperand(opNum).getIndex();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ O << "\t" << directive << " ";
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (i > 0)
+ O << ",";
+ O << *MBB->getSymbol();
+ }
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+ default:
+ llvm_unreachable("not implemented");
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps == 4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+MachineLocation XCoreAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ switch (MI->getOpcode()) {
+ case XCore::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case XCore::ADD_2rus:
+ if (MI->getOperand(2).getImm() == 0) {
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+ break;
+ }
+ printInstruction(MI, O);
+ OutStreamer.EmitRawText(O.str());
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() {
+ RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 0000000..b20d71f
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,36 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+ // i32 are returned in registers R0, R1, R2, R3
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 0000000..7f8b169
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,380 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+ .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("storeToStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode))
+ .addReg(SrcReg)
+ .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
+ STI(sti) {
+ // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+ return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = &MF.getMMI();
+ const XCoreRegisterInfo *RegInfo =
+ static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ bool FP = hasFP(MF);
+ bool Nested = MF.getFunction()->
+ getAttributes().hasAttrSomewhere(Attribute::Nest);
+
+ if (Nested) {
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+ }
+ bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+
+ // Do we need to allocate space on the stack?
+ if (FrameSize) {
+ bool saveLR = XFI->getUsesLR();
+ bool LRSavedOnEntry = false;
+ int Opcode;
+ if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+ Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+ MBB.addLiveIn(XCore::LR);
+ saveLR = false;
+ LRSavedOnEntry = true;
+ } else {
+ Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+ if (emitFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ if (LRSavedOnEntry) {
+ MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+ MachineLocation CSSrc(XCore::LR);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+ }
+ if (saveLR) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+ MBB.addLiveIn(XCore::LR);
+
+ if (emitFrameMoves) {
+ MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+ MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+ MachineLocation CSSrc(XCore::LR);
+ MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+ }
+ }
+ }
+
+ if (FP) {
+ // Save R10 to the stack.
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+ // R10 is live-in. It is killed at the spill.
+ MBB.addLiveIn(XCore::R10);
+ if (emitFrameMoves) {
+ MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+ MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+ MachineLocation CSSrc(XCore::R10);
+ MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+ }
+ // Set the FP from the SP.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+ .addImm(0);
+ if (emitFrameMoves) {
+ // Show FP is now valid.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ MachineLocation SPDst(FramePtr);
+ MachineLocation SPSrc(MachineLocation::VirtualFP);
+ MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ MCSymbol *SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = CSI.getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+ }
+ }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+ if (FP) {
+ // Restore the stack pointer.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+ .addReg(FramePtr);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+ }
+
+ if (FrameSize) {
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+ if (FP) {
+ // Restore R10
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ FPSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+ }
+ bool restoreLR = XFI->getUsesLR();
+ if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ LRSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+ restoreLR = false;
+ }
+ if (restoreLR) {
+ // Fold prologue into return instruction
+ assert(MBBI->getOpcode() == XCore::RETSP_u6
+ || MBBI->getOpcode() == XCore::RETSP_lu6);
+ int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+ MBB.erase(MBBI);
+ } else {
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+ }
+ }
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(it->getReg());
+
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ it->getFrameIdx(), RC, TRI);
+ if (emitFrameMoves) {
+ MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+ XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+ }
+ }
+ return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const{
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ bool AtStart = MI == MBB.begin();
+ MachineBasicBlock::iterator BeforeI = MI;
+ if (!AtStart)
+ --BeforeI;
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+ RC, TRI);
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ MI = MBB.begin();
+ else {
+ MI = BeforeI;
+ ++MI;
+ }
+ }
+ return true;
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ if (LRUsed) {
+ MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+ bool isVarArg = MF.getFunction()->isVarArg();
+ int FrameIdx;
+ if (! isVarArg) {
+ // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+ } else {
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+ false);
+ }
+ XFI->setUsesLR(FrameIdx);
+ XFI->setLRSpillSlot(FrameIdx);
+ }
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ if (hasFP(MF)) {
+ // A callee save register is used to hold the FP.
+ // This needs saving / restoring in the epilogue / prologue.
+ XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+void XCoreFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 0000000..c591e93
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,57 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class XCoreSubtarget;
+
+ class XCoreFrameLowering: public TargetFrameLowering {
+ const XCoreSubtarget &STI;
+ public:
+ XCoreFrameLowering(const XCoreSubtarget &STI);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ //! Stack slot size (4 bytes)
+ static int stackSlotSize() {
+ return 4;
+ }
+ };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 0000000..4dac1ce
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,300 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class XCoreDAGToDAGISel : public SelectionDAGISel {
+ const XCoreTargetLowering &Lowering;
+ const XCoreSubtarget &Subtarget;
+
+ public:
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM)
+ : SelectionDAGISel(TM),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ SDNode *Select(SDNode *N);
+ SDNode *SelectBRIND(SDNode *N);
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
+ // Complex Pattern Selectors.
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ virtual const char *getPassName() const {
+ return "XCore DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "XCoreGenDAGISel.inc"
+ };
+} // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
+ return new XCoreDAGToDAGISel(TM);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
+ // Transformation function: get the size of a mask
+ // Look for the first non-zero bit
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+ return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+ MVT::i32, MskSize);
+ }
+ else if (!isUInt<16>(Val)) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+ SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);
+ cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+ return node;
+ }
+ break;
+ }
+ case XCoreISD::LADD: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::LSUB: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::MACCU: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::MACCS: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::LMUL: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::xcore_crc8:
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ break;
+ }
+ case ISD::BRIND:
+ if (SDNode *ResNode = SelectBRIND(N))
+ return ResNode;
+ break;
+ // Other cases are autogenerated.
+ }
+ return SelectCode(N);
+}
+
+/// Given a chain return a new chain where any appearance of Old is replaced
+/// by New. There must be at most one instruction between Old and Chain and
+/// this instruction must be a TokenFactor. Returns an empty SDValue if
+/// these conditions don't hold.
+static SDValue
+replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
+{
+ if (Chain == Old)
+ return New;
+ if (Chain->getOpcode() != ISD::TokenFactor)
+ return SDValue();
+ SmallVector<SDValue, 8> Ops;
+ bool found = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) {
+ if (Chain->getOperand(i) == Old) {
+ Ops.push_back(New);
+ found = true;
+ } else {
+ Ops.push_back(Chain->getOperand(i));
+ }
+ }
+ if (!found)
+ return SDValue();
+ return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other,
+ &Ops[0], Ops.size());
+}
+
+SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // (brind (int_xcore_checkevent (addr)))
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return 0;
+ unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::xcore_checkevent)
+ return 0;
+ SDValue nextAddr = Addr->getOperand(2);
+ SDValue CheckEventChainOut(Addr.getNode(), 1);
+ if (!CheckEventChainOut.use_empty()) {
+ // If the chain out of the checkevent intrinsic is an operand of the
+ // indirect branch or used in a TokenFactor which is the operand of the
+ // indirect branch then build a new chain which uses the chain coming into
+ // the checkevent intrinsic instead.
+ SDValue CheckEventChainIn = Addr->getOperand(0);
+ SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
+ CheckEventChainIn);
+ if (!NewChain.getNode())
+ return 0;
+ Chain = NewChain;
+ }
+ // Enable events on the thread using setsr 1 and then disable them immediately
+ // after with clrsr 1. If any resources owned by the thread are ready an event
+ // will be taken. If no resource is ready we branch to the address which was
+ // the operand to the checkevent intrinsic.
+ SDValue constOne = getI32Imm(1);
+ SDValue Glue =
+ SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
+ constOne, Chain), 0);
+ Glue =
+ SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue,
+ constOne, Glue), 0);
+ if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper &&
+ nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) {
+ return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other,
+ nextAddr->getOperand(0), Glue);
+ }
+ return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 0000000..2afe0e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,1615 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "XCoreTargetObjectFile.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case XCoreISD::BL : return "XCoreISD::BL";
+ case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+ case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+ case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+ case XCoreISD::STWSP : return "XCoreISD::STWSP";
+ case XCoreISD::RETSP : return "XCoreISD::RETSP";
+ case XCoreISD::LADD : return "XCoreISD::LADD";
+ case XCoreISD::LSUB : return "XCoreISD::LSUB";
+ case XCoreISD::LMUL : return "XCoreISD::LMUL";
+ case XCoreISD::MACCU : return "XCoreISD::MACCU";
+ case XCoreISD::MACCS : return "XCoreISD::MACCS";
+ case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
+ case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
+ default : return NULL;
+ }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+ : TargetLowering(XTM, new XCoreTargetObjectFile()),
+ TM(XTM),
+ Subtarget(*XTM.getSubtargetImpl()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setStackPointerRegisterToSaveRestore(XCore::SP);
+
+ setSchedulingPreference(Sched::RegPressure);
+
+ // Use i32 for setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // XCore does not have the NodeTypes below.
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+ // Stop the combiner recombining select and set_cc
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // 64bit
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // Bit Manipulation
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Jump tables.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Conversion of i64 -> double produces constantpool nodes
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Loads
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Custom expand misaligned loads / stores.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ // Varargs
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ // Dynamic stack
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+ maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+ = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ADD);
+
+ setMinFunctionAlignment(1);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode())
+ {
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
+ case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
+ // FIXME: Remove these when LegalizeDAGTypes lands.
+ case ISD::ADD:
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operand");
+ return SDValue();
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ return;
+ case ISD::ADD:
+ case ISD::SUB:
+ Results.push_back(ExpandADDSUB(N, DAG));
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+ SelectionDAG &DAG) const
+{
+ // FIXME there is no actual debug info here
+ DebugLoc dl = GA.getDebugLoc();
+ if (isa<Function>(GV)) {
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+ }
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine constness
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ bool isConst = GVar && GVar->isConstant();
+ if (isConst) {
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ }
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
+ return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(Type *Ty) {
+ ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+ return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ // transform to label + getid() * size
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine size
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ if (! GVar) {
+ llvm_unreachable("Thread local object not a GlobalVariable?");
+ return SDValue();
+ }
+ Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+#ifndef NDEBUG
+ errs() << "Size of thread local object " << GVar->getName()
+ << " is unknown\n";
+#endif
+ llvm_unreachable(0);
+ }
+ SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+ DAG.getConstant(Size, MVT::i32));
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really debug info here
+ DebugLoc dl = CP->getDebugLoc();
+ EVT PtrVT = Op.getValueType();
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry()) {
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ } else {
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ }
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+}
+
+unsigned XCoreTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue XCoreTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+ if (NumEntries <= 32) {
+ return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+ }
+ assert((NumEntries >> 31) == 0);
+ SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+ ScaledIndex);
+}
+
+static bool
+IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
+ int64_t &Offset)
+{
+ if (Addr.getOpcode() != ISD::ADD) {
+ return false;
+ }
+ ConstantSDNode *CN = 0;
+ if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ return false;
+ }
+ int64_t off = CN->getSExtValue();
+ const SDValue &Base = Addr.getOperand(0);
+ const SDValue *Root = &Base;
+ if (Base.getOpcode() == ISD::ADD &&
+ Base.getOperand(1).getOpcode() == ISD::SHL) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
+ .getOperand(1));
+ if (CN && (CN->getSExtValue() >= 2)) {
+ Root = &Base.getOperand(0);
+ }
+ }
+ if (isa<FrameIndexSDNode>(*Root)) {
+ // All frame indicies are word aligned
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
+ Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
+ // All dp / cp relative addresses are word aligned
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ return false;
+}
+
+SDValue XCoreTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Unexpected extension type");
+ assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
+ if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+ return SDValue();
+
+ unsigned ABIAlignment = getTargetData()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ // Leave aligned load alone.
+ if (LD->getAlignment() >= ABIAlignment)
+ return SDValue();
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Base;
+ int64_t Offset;
+ if (!LD->isVolatile() &&
+ IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
+ if (Offset % 4 == 0) {
+ // We've managed to infer better alignment information than the load
+ // already has. Use an aligned load.
+ //
+ return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ }
+ // Lower to
+ // ldw low, base[offset >> 2]
+ // ldw high, base[(offset >> 2) + 1]
+ // shr low_shifted, low, (offset & 0x3) * 8
+ // shl high_shifted, high, 32 - (offset & 0x3) * 8
+ // or result, low_shifted, high_shifted
+ SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
+ SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
+ SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
+ SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
+
+ SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
+
+ SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+ LowAddr, MachinePointerInfo(), false, false, 0);
+ SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+ HighAddr, MachinePointerInfo(), false, false, 0);
+ SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+ if (LD->getAlignment() == 2) {
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+ BasePtr, LD->getPointerInfo(), MVT::i16,
+ LD->isVolatile(), LD->isNonTemporal(), 2);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+ HighAddr,
+ LD->getPointerInfo().getWithOffset(2),
+ MVT::i16, LD->isVolatile(),
+ LD->isNonTemporal(), 2);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
+ DAG.getConstant(16, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+ // Lower to a call to __misaligned_load(BasePtr).
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = BasePtr;
+ Args.push_back(Entry);
+
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, IntPtrTy, false, false,
+ false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+ Args, DAG, DL);
+
+ SDValue Ops[] =
+ { CallResult.first, CallResult.second };
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue XCoreTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+ assert(!ST->isTruncatingStore() && "Unexpected store type");
+ assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
+ if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ return SDValue();
+ }
+ unsigned ABIAlignment = getTargetData()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ // Leave aligned store alone.
+ if (ST->getAlignment() >= ABIAlignment) {
+ return SDValue();
+ }
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (ST->getAlignment() == 2) {
+ SDValue Low = Value;
+ SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
+ DAG.getConstant(16, MVT::i32));
+ SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
+ ST->getPointerInfo(), MVT::i16,
+ ST->isVolatile(), ST->isNonTemporal(),
+ 2);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
+ ST->getPointerInfo().getWithOffset(2),
+ MVT::i16, ST->isVolatile(),
+ ST->isNonTemporal(), 2);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
+ }
+
+ // Lower to a call to __misaligned_store(BasePtr, Value).
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = BasePtr;
+ Args.push_back(Entry);
+
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
+ false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+ Args, DAG, dl);
+
+ return CallResult.second;
+}
+
+SDValue XCoreTargetLowering::
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+ assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
+ "Unexpected operand to lower!");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Zero, Zero,
+ LHS, RHS);
+ SDValue Lo(Hi.getNode(), 1);
+ SDValue Ops[] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue XCoreTargetLowering::
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+ assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
+ "Unexpected operand to lower!");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), LHS, RHS,
+ Zero, Zero);
+ SDValue Lo(Hi.getNode(), 1);
+ SDValue Ops[] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// isADDADDMUL - Return whether Op is in a form that is equivalent to
+/// add(add(mul(x,y),a),b). If requireIntermediatesHaveOneUse is true then
+/// each intermediate result in the calculation must also have a single use.
+/// If the Op is in the correct form the constituent parts are written to Mul0,
+/// Mul1, Addend0 and Addend1.
+static bool
+isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
+ SDValue &Addend1, bool requireIntermediatesHaveOneUse)
+{
+ if (Op.getOpcode() != ISD::ADD)
+ return false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue AddOp;
+ SDValue OtherOp;
+ if (N0.getOpcode() == ISD::ADD) {
+ AddOp = N0;
+ OtherOp = N1;
+ } else if (N1.getOpcode() == ISD::ADD) {
+ AddOp = N1;
+ OtherOp = N0;
+ } else {
+ return false;
+ }
+ if (requireIntermediatesHaveOneUse && !AddOp.hasOneUse())
+ return false;
+ if (OtherOp.getOpcode() == ISD::MUL) {
+ // add(add(a,b),mul(x,y))
+ if (requireIntermediatesHaveOneUse && !OtherOp.hasOneUse())
+ return false;
+ Mul0 = OtherOp.getOperand(0);
+ Mul1 = OtherOp.getOperand(1);
+ Addend0 = AddOp.getOperand(0);
+ Addend1 = AddOp.getOperand(1);
+ return true;
+ }
+ if (AddOp.getOperand(0).getOpcode() == ISD::MUL) {
+ // add(add(mul(x,y),a),b)
+ if (requireIntermediatesHaveOneUse && !AddOp.getOperand(0).hasOneUse())
+ return false;
+ Mul0 = AddOp.getOperand(0).getOperand(0);
+ Mul1 = AddOp.getOperand(0).getOperand(1);
+ Addend0 = AddOp.getOperand(1);
+ Addend1 = OtherOp;
+ return true;
+ }
+ if (AddOp.getOperand(1).getOpcode() == ISD::MUL) {
+ // add(add(a,mul(x,y)),b)
+ if (requireIntermediatesHaveOneUse && !AddOp.getOperand(1).hasOneUse())
+ return false;
+ Mul0 = AddOp.getOperand(1).getOperand(0);
+ Mul1 = AddOp.getOperand(1).getOperand(1);
+ Addend0 = AddOp.getOperand(0);
+ Addend1 = OtherOp;
+ return true;
+ }
+ return false;
+}
+
+SDValue XCoreTargetLowering::
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
+{
+ SDValue Mul;
+ SDValue Other;
+ if (N->getOperand(0).getOpcode() == ISD::MUL) {
+ Mul = N->getOperand(0);
+ Other = N->getOperand(1);
+ } else if (N->getOperand(1).getOpcode() == ISD::MUL) {
+ Mul = N->getOperand(1);
+ Other = N->getOperand(0);
+ } else {
+ return SDValue();
+ }
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, RL, AddendL, AddendH;
+ LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(0), DAG.getConstant(0, MVT::i32));
+ RL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(1), DAG.getConstant(0, MVT::i32));
+ AddendL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Other, DAG.getConstant(0, MVT::i32));
+ AddendH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Other, DAG.getConstant(1, MVT::i32));
+ APInt HighMask = APInt::getHighBitsSet(64, 32);
+ unsigned LHSSB = DAG.ComputeNumSignBits(Mul.getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(Mul.getOperand(1));
+ if (DAG.MaskedValueIsZero(Mul.getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(Mul.getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ if (LHSSB > 32 && RHSSB > 32) {
+ // The inputs are both sign-extended.
+ SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ SDValue LH, RH;
+ LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(0), DAG.getConstant(1, MVT::i32));
+ RH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(1), DAG.getConstant(1, MVT::i32));
+ SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ RH = DAG.getNode(ISD::MUL, dl, MVT::i32, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, MVT::i32, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, LH);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
+{
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Unknown operand to lower!");
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue Result = TryExpandADDWithMul(N, DAG);
+ if (Result.getNode() != 0)
+ return Result;
+ }
+
+ DebugLoc dl = N->getDebugLoc();
+
+ // Extract components
+ SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(0, MVT::i32));
+ SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(1, MVT::i32));
+ SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(0, MVT::i32));
+ SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+ // Expand
+ unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+ XCoreISD::LSUB;
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Lo(Carry.getNode(), 1);
+
+ SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Hi(Ignored.getNode(), 1);
+ // Merge the pieces
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
+{
+ llvm_unreachable("unimplemented");
+ // FIX Arguments passed by reference need a extra dereference.
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(1), MachinePointerInfo(V),
+ false, false, 0);
+ // Increment the pointer, VAList, to the next vararg
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+ DAG.getConstant(VT.getSizeInBits(),
+ getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc dl = Op.getDebugLoc();
+ // vastart stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument
+ MachineFunction &MF = DAG.getMachineFunction();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+ MachinePointerInfo(), false, false, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+SDValue XCoreTargetLowering::
+LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue XCoreTargetLowering::
+LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ // .align 4
+ // LDAPF_u10 r11, nest
+ // LDW_2rus r11, r11[0]
+ // STWSP_ru6 r11, sp[0]
+ // LDAPF_u10 r11, fptr
+ // LDW_2rus r11, r11[0]
+ // BAU_1r r11
+ // nest:
+ // .word nest
+ // fptr:
+ // .word fptr
+ SDValue OutChains[5];
+
+ SDValue Addr = Trmp;
+
+ DebugLoc dl = Op.getDebugLoc();
+ OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(4, MVT::i32));
+ OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 4), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(8, MVT::i32));
+ OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 8), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(12, MVT::i32));
+ OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(16, MVT::i32));
+ OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 16), false, false,
+ 0);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore call implementation
+SDValue
+XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // XCore target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // For now, only CallingConv::C implemented
+ switch (CallConv)
+ {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue
+XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // The ABI dictates there should be one stack slot available to the callee
+ // on function entry (for saving lr).
+ CCInfo.AllocateStack(4, 4);
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ int Offset = VA.getLocMemOffset();
+
+ MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+ Chain, Arg,
+ DAG.getConstant(Offset/4, MVT::i32)));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore formal arguments implementation
+SDValue
+XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ switch (CallConv)
+ {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue
+XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
+
+ unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
+
+ unsigned LRSaveSize = StackSlotSize;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ EVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT().SimpleTy) {
+ default:
+ {
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ case MVT::i32:
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ }
+ } else {
+ // sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > StackSlotSize) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize,
+ LRSaveSize + VA.getLocMemOffset(),
+ true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ if (isVarArg) {
+ /* Argument registers */
+ static const unsigned ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+ };
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+ array_lengthof(ArgRegs));
+ if (FirstVAReg < array_lengthof(ArgRegs)) {
+ SmallVector<SDValue, 4> MemOps;
+ int offset = 0;
+ // Save remaining registers, storing higher register numbers at a higher
+ // address
+ for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
+ // Create a stack slot
+ int FI = MFI->CreateFixedObject(4, offset, true);
+ if (i == (int)FirstVAReg) {
+ XFI->setVarArgsFrameIndex(FI);
+ }
+ offset -= StackSlotSize;
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ // Move argument from phys reg -> virt reg
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(ArgRegs[i], VReg);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ // Move argument from virt reg -> stack
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else {
+ // This will point to the next argument passed via stack.
+ XFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+ true));
+ }
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_XCore);
+}
+
+SDValue
+XCoreTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on XCore is always a "retsp 0"
+ if (Flag.getNode())
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == XCore::SELECT_CC) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case XCoreISD::LADD: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
+
+ // fold (ladd 0, 0, x) -> 0, x & 1
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ SDValue Carry = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
+ DAG.getConstant(1, VT));
+ SDValue Ops [] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+
+ // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
+ // low bit set
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+ if (KnownZero == Mask) {
+ SDValue Carry = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
+ SDValue Ops [] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ }
+ break;
+ case XCoreISD::LSUB: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+ if (KnownZero == Mask) {
+ SDValue Borrow = N2;
+ SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, VT), N2);
+ SDValue Ops [] = { Borrow, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+
+ // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
+ // low bit set
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+ if (KnownZero == Mask) {
+ SDValue Borrow = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
+ SDValue Ops [] = { Borrow, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ }
+ break;
+ case XCoreISD::LMUL: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ // Canonicalize multiplicative constant to RHS. If both multiplicative
+ // operands are constant canonicalize smallest to RHS.
+ if ((N0C && !N1C) ||
+ (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
+ return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT),
+ N1, N0, N2, N3);
+
+ // lmul(x, 0, a, b)
+ if (N1C && N1C->isNullValue()) {
+ // If the high result is unused fold to add(a, b)
+ if (N->hasNUsesOfValue(0, 0)) {
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
+ SDValue Ops [] = { Lo, Lo };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ // Otherwise fold to ladd(a, b, 0)
+ return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ }
+ }
+ break;
+ case ISD::ADD: {
+ // Fold 32 bit expressions such as add(add(mul(x,y),a),b) ->
+ // lmul(x, y, a, b). The high result of lmul will be ignored.
+ // This is only profitable if the intermediate results are unused
+ // elsewhere.
+ SDValue Mul0, Mul1, Addend0, Addend1;
+ if (N->getValueType(0) == MVT::i32 &&
+ isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
+ SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Mul0,
+ Mul1, Addend0, Addend1);
+ SDValue Result(Ignored.getNode(), 1);
+ return Result;
+ }
+ APInt HighMask = APInt::getHighBitsSet(64, 32);
+ // Fold 64 bit expression such as add(add(mul(x,y),a),b) ->
+ // lmul(x, y, a, b) if all operands are zero-extended. We do this
+ // before type legalization as it is messy to match the operands after
+ // that.
+ if (N->getValueType(0) == MVT::i64 &&
+ isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, false) &&
+ DAG.MaskedValueIsZero(Mul0, HighMask) &&
+ DAG.MaskedValueIsZero(Mul1, HighMask) &&
+ DAG.MaskedValueIsZero(Addend0, HighMask) &&
+ DAG.MaskedValueIsZero(Addend1, HighMask)) {
+ SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul0, DAG.getConstant(0, MVT::i32));
+ SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul1, DAG.getConstant(0, MVT::i32));
+ SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Addend0, DAG.getConstant(0, MVT::i32));
+ SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Addend1, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
+ Addend0L, Addend1L);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ }
+ break;
+ case ISD::STORE: {
+ // Replace unaligned store of unaligned load with memmove.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!DCI.isBeforeLegalize() ||
+ allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+ ST->isVolatile() || ST->isIndexed()) {
+ break;
+ }
+ SDValue Chain = ST->getChain();
+
+ unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
+ if (StoreBits % 8) {
+ break;
+ }
+ unsigned ABIAlignment = getTargetData()->getABITypeAlignment(
+ ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
+ unsigned Alignment = ST->getAlignment();
+ if (Alignment >= ABIAlignment) {
+ break;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
+ if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
+ LD->getAlignment() == Alignment &&
+ !LD->isVolatile() && !LD->isIndexed() &&
+ Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+ return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
+ LD->getBasePtr(),
+ DAG.getConstant(StoreBits/8, MVT::i32),
+ Alignment, false, ST->getPointerInfo(),
+ LD->getPointerInfo());
+ }
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case XCoreISD::LADD:
+ case XCoreISD::LSUB:
+ if (Op.getResNo() == 0) {
+ // Top bits of carry / borrow are clear.
+ KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
+ KnownZero &= Mask;
+ }
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+ return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+ return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+ return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ if (Ty->getTypeID() == Type::VoidTyID)
+ return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (AM.BaseGV) {
+ return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
+ AM.BaseOffs%4 == 0;
+ }
+
+ switch (Size) {
+ case 1:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs(AM.BaseOffs);
+ }
+ // reg + reg
+ return AM.Scale == 1 && AM.BaseOffs == 0;
+ case 2:
+ case 3:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs2(AM.BaseOffs);
+ }
+ // reg + reg<<1
+ return AM.Scale == 2 && AM.BaseOffs == 0;
+ default:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs4(AM.BaseOffs);
+ }
+ // reg + reg<<2
+ return AM.Scale == 4 && AM.BaseOffs == 0;
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+XCoreTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ return std::make_pair(0U, XCore::GRRegsRegisterClass);
+ }
+ }
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 0000000..d6c5b32
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,202 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "XCore.h"
+
+namespace llvm {
+
+ // Forward delcarations
+ class XCoreSubtarget;
+ class XCoreTargetMachine;
+
+ namespace XCoreISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Branch and link (call)
+ BL,
+
+ // pc relative address
+ PCRelativeWrapper,
+
+ // dp relative address
+ DPRelativeWrapper,
+
+ // cp relative address
+ CPRelativeWrapper,
+
+ // Store word to stack
+ STWSP,
+
+ // Corresponds to retsp instruction
+ RETSP,
+
+ // Corresponds to LADD instruction
+ LADD,
+
+ // Corresponds to LSUB instruction
+ LSUB,
+
+ // Corresponds to LMUL instruction
+ LMUL,
+
+ // Corresponds to MACCU instruction
+ MACCU,
+
+ // Corresponds to MACCS instruction
+ MACCS,
+
+ // Jumptable branch.
+ BR_JT,
+
+ // Jumptable branch using long branches for each entry.
+ BR_JT32
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class XCoreTargetLowering : public TargetLowering
+ {
+ public:
+
+ explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const;
+
+ private:
+ const XCoreTargetMachine &TM;
+ const XCoreSubtarget &Subtarget;
+
+ // Lower Operand helpers
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+ SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+ SelectionDAG &DAG) const;
+
+ // Lower Operand specifics
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+
+ // Inline asm support
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Expand specifics
+ SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+ SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const;
+ };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 0000000..8002c99
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,120 @@
+//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "XCore";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 0000000..a0946a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,406 @@
+//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+namespace XCore {
+
+ // XCore Condition Codes
+ enum CondCode {
+ COND_TRUE,
+ COND_FALSE,
+ COND_INVALID
+ };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo()
+ : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+ RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::STWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+ return BrOpc == XCore::BRFU_u6
+ || BrOpc == XCore::BRFU_lu6
+ || BrOpc == XCore::BRBU_u6
+ || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+ return BrOpc == XCore::BRFT_ru6
+ || BrOpc == XCore::BRFT_lru6
+ || BrOpc == XCore::BRBT_ru6
+ || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+ return BrOpc == XCore::BRFF_ru6
+ || BrOpc == XCore::BRFF_lru6
+ || BrOpc == XCore::BRBF_ru6
+ || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+ return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+static inline bool IsBR_JT(unsigned BrOpc) {
+ return BrOpc == XCore::BR_JT
+ || BrOpc == XCore::BR_JT32;
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ if (IsBRT(BrOpc)) {
+ return XCore::COND_TRUE;
+ } else if (IsBRF(BrOpc)) {
+ return XCore::COND_FALSE;
+ } else {
+ return XCore::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::BRFT_lru6;
+ case XCore::COND_FALSE : return XCore::BRFF_lru6;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::COND_FALSE;
+ case XCore::COND_FALSE : return XCore::COND_TRUE;
+ }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (IsBRU(LastInst->getOpcode())) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == XCore::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ // If the block ends with conditional branch followed by unconditional,
+ // handle it.
+ if (BranchCode != XCore::COND_INVALID
+ && IsBRU(LastInst->getOpcode())) {
+
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(SecondLastInst->getOperand(0));
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (IsBRU(SecondLastInst->getOpcode()) &&
+ IsBRU(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Likewise if it ends with a branch table followed by an unconditional branch.
+ if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Unexpected number of components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ assert(Cond.size() == 2 && "Unexpected number of components!");
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
+ return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!IsCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+ bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg);
+
+ if (GRDest && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
+ if (GRDest && SrcReg == XCore::SP) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+ return;
+ }
+
+ if (DestReg == XCore::SP && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::STWFI))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+MachineInstr*
+XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert((Cond.size() == 2) &&
+ "Invalid XCore branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 0000000..d354802
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,93 @@
+//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "XCoreRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+
+class XCoreInstrInfo : public XCoreGenInstrInfo {
+ const XCoreRegisterInfo RI;
+public:
+ XCoreInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 0000000..4d2e93b
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,1251 @@
+//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def SDT_XCoreAddress : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+ [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+ // Transformation function: imm/4
+ assert(N->getZExtValue() % 4 == 0);
+ return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+ // Transformation function: get the size of a mask
+ assert(isMask_32(N->getZExtValue()));
+ // look for the first non-zero bit
+ return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(-value);
+}]>;
+
+def bpwsub_xform : SDNodeXForm<imm, [{
+ // Transformation function: 32-imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(32-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm/4
+ uint32_t value = N->getZExtValue();
+ assert(-value % 4 == 0);
+ return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+ return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
+
+def immBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 1 && value <= 8)
+ || value == 16
+ || value == 24
+ || value == 32;
+}]>;
+
+def immBpwSubBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 24 && value <= 31)
+ || value == 16
+ || value == 8
+ || value == 0;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+ []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+ []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+// Jump tables.
+def InlineJT : Operand<i32> {
+ let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+ let PrintMethod = "printInlineJT32";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<string OpcStr> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+}
+
+multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<string OpcStr, SDNode OpNode> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<string OpcStr> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+}
+
+multiclass FRU6_LRU6_cp<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+}
+
+// U6
+multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU16:$b)]>;
+}
+multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(Int immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(Int immU16:$b)]>;
+}
+
+multiclass FU6_LU6_np<string OpcStr> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// U10
+multiclass FU10_LU10_np<string OpcStr> {
+ def _u10: _FU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu10: _FLU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// Two operand short
+
+class F2R_np<string OpcStr> : _F2R<
+ (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"),
+ []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "${:comment} ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDWFI $dst, $addr",
+ [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDAWFI $dst, $addr",
+ [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+ "${:comment} STWFI $src, $addr",
+ [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {
+ def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+ (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+ "${:comment} SELECT_CC PSEUDO!",
+ [(set GRRegs:$dst,
+ (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<"add", add>;
+defm SUB : F3R_2RUS<"sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<"eq">;
+def LSS_3r : F3R_np<"lss">;
+def LSU_3r : F3R_np<"lsu">;
+}
+def AND_3r : F3R<"and", and>;
+def OR_3r : F3R<"or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]",
+ []>;
+
+def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]",
+ []>;
+}
+
+let mayStore=1 in {
+def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+
+def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+}
+
+defm SHL : F3R_2RBITP<"shl", shl>;
+defm SHR : F3R_2RBITP<"shr", srl>;
+// TODO tsetr
+
+// Three operand long
+def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]",
+ []>;
+
+def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ []>;
+
+def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<"mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<"divs", sdiv>;
+def DIVU_l3r : FL3R<"divu", udiv>;
+def REMS_l3r : FL3R<"rems", srem>;
+def REMU_l3r : FL3R<"remu", urem>;
+}
+def XOR_l3r : FL3R<"xor", xor>;
+defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+
+let Constraints = "$src1 = $dst" in
+def CRC_l3r : _FL3R<(outs GRRegs:$dst),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc32 $dst, $src2, $src3",
+ [(set GRRegs:$dst,
+ (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+ GRRegs:$src3))]>;
+
+// TODO inpw, outpw
+let mayStore=1 in {
+def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]",
+ []>;
+
+def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]",
+ []>;
+}
+
+// Four operand long
+let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccu $dst1, $dst2, $src3, $src4",
+ []>;
+
+def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccs $dst1, $dst2, $src3, $src4",
+ []>;
+}
+
+let Constraints = "$src1 = $dst1" in
+def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc8 $dst1, $dst2, $src2, $src3",
+ []>;
+
+// Five operand long
+
+def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldiv $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+// Six operand long
+
+def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
+ []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ []>;
+
+let isReMaterializable = 1 in
+def LDAWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ [(set GRRegs:$dst, ADDRdpii:$a)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ []>;
+
+def LDWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ []>;
+
+def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ [(store GRRegs:$val, ADDRdpii:$addr)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
+defm LDWCP : FRU6_LRU6_cp<"ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
+
+def STWSP_lru6 : _FLRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+
+def LDWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_ru6_RRegs : _FRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6_RRegs : _FLRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU16:$b)]>;
+}
+
+def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+
+def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+
+// Operand register - U6
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<"bt">;
+defm BRBT: FRU6_LRU6_branch<"bt">;
+defm BRFF: FRU6_LRU6_branch<"bf">;
+defm BRBF: FRU6_LRU6_branch<"bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<"extsp">;
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<"entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
+defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+}
+}
+
+// TODO extdp, kentsp, krestsp, blat
+// getsr, kalli
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def BRBU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRBU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+}
+
+//let Uses = [CP] in ...
+let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLRU6<
+ (outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
+
+defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+
+// setsr may cause a branch if it is used to enable events. clrsr may
+// branch if it is executed while events are enabled.
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
+defm SETSR_branch : FU6_LU6_np<"setsr">;
+defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+}
+
+// U10
+// TODO ldwcpl, blacp
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAP_u10 : _FU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10 : _FLU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10_ba : _FLU10<(outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
+def BL_u10 : _FU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU10:$target)]>;
+
+def BL_lu10 : _FLU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU20:$target)]>;
+}
+
+// Two operand short
+// TODO eet, eef, tsetmr
+def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b",
+ [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b",
+ [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+let Constraints = "$src1 = $dst" in {
+def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ GRRegs:$src2))]>;
+
+def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ GRRegs:$src2))]>;
+
+def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size",
+ []>;
+
+def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+
+def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
+ "getr $dst, $type",
+ [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "getts $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setpt res[$r], $val",
+ [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "outt res[$r], $val",
+ [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "out res[$r], $val",
+ [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+let Constraints = "$src = $dst" in
+def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+ "outshr res[$r], $src",
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r,
+ GRRegs:$src))]>;
+
+def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "inct $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "int $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "in $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+let Constraints = "$src = $dst" in
+def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+ "inshr $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r,
+ GRRegs:$src))]>;
+
+def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "testct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
+
+def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "testwct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
+
+def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setd res[$r], $val",
+ [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
+def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "getst $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
+
+def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:sp, $src",
+ [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
+
+def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:pc, $src",
+ [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
+
+def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:cp, $src",
+ [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
+
+def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:dp, $src",
+ [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+
+// Two operand long
+// getd, testlcl
+def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
+def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "settw res[$r], $val",
+ [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
+def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "get $dst, ps[$src]",
+ [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+
+def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "set ps[$src1], $src2",
+ [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:lr, $src",
+ [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
+
+def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setclk res[$src1], $src2",
+ [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setrdy res[$src1], $src2",
+ [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "peek $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "endin $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
+// One operand short
+// TODO edu, eeu, waitet, waitef, tstart, clrtp
+// setdp, setcp, setev, kcall
+// dgetreg
+def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
+ "msync res[$i]",
+ [(int_xcore_msync GRRegs:$i)]>;
+def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
+ "mjoin res[$i]",
+ [(int_xcore_mjoin GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
+ "bau $addr",
+ [(brind GRRegs:$addr)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
+ "set sp, $src",
+ []>;
+
+let hasCtrlDep = 1 in
+def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallt $src",
+ []>;
+
+let hasCtrlDep = 1 in
+def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallf $src",
+ []>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+ "bla $addr",
+ [(XCoreBranchLink GRRegs:$addr)]>;
+}
+
+def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
+ "syncr res[$r]",
+ [(int_xcore_syncr GRRegs:$r)]>;
+
+def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
+ "freer res[$r]",
+ [(int_xcore_freer GRRegs:$r)]>;
+
+let Uses=[R11] in {
+def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
+ "setv res[$r], r11",
+ [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def SETEV_1r : _F1R<(outs), (ins GRRegs:$r),
+ "setev res[$r], r11",
+ [(int_xcore_setev GRRegs:$r, R11)]>;
+}
+
+def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
+ "eeu res[$r]",
+ [(int_xcore_eeu GRRegs:$r)]>;
+
+// Zero operand short
+// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// dentsp, drestsp
+
+def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+
+let Defs = [R11] in {
+def GETID_0R : _F0R<(outs), (ins),
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+def GETED_0R : _F0R<(outs), (ins),
+ "get r11, ed",
+ [(set R11, (int_xcore_geted))]>;
+
+def GETET_0R : _F0R<(outs), (ins),
+ "get r11, et",
+ [(set R11, (int_xcore_getet))]>;
+}
+
+def SSYNC_0r : _F0R<(outs), (ins),
+ "ssync",
+ [(int_xcore_ssync)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ hasSideEffects = 1 in
+def WAITEU_0R : _F0R<(outs), (ins),
+ "waiteu",
+ [(brind (int_xcore_waitevent))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+ (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+ (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+ (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+ (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+ (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+ (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+ (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+ (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+ (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+ (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+ (ASHR_l2rus GRRegs:$src, 32)>;
+
+def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+// setge X, 0 is canonicalized to setgt X, -1
+def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst),
+ (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(setgt GRRegs:$lhs, -1),
+ (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>;
+
+def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm),
+ (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 0000000..a575a0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+private:
+ bool UsesLR;
+ int LRSpillSlot;
+ int FPSpillSlot;
+ int VarArgsFrameIndex;
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
+
+public:
+ XCoreFunctionInfo() :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ explicit XCoreFunctionInfo(MachineFunction &MF) :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ ~XCoreFunctionInfo() {}
+
+ void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+
+ void setUsesLR(bool val) { UsesLR = val; }
+ bool getUsesLR() const { return UsesLR; }
+
+ void setLRSpillSlot(int off) { LRSpillSlot = off; }
+ int getLRSpillSlot() const { return LRSpillSlot; }
+
+ void setFPSpillSlot(int off) { FPSpillSlot = off; }
+ int getFPSpillSlot() const { return FPSpillSlot; }
+
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
+ return SpillLabels;
+ }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 0000000..1b78b37
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,327 @@
+//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+ : XCoreGenRegisterInfo(XCore::LR), TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static const unsigned XCore_ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+};
+
+const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
+{
+ return XCore_ArgRegs;
+}
+
+unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
+{
+ return array_lengthof(XCore_ArgRegs);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
+ return MF.getMMI().hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+}
+
+const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+ XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+ 0
+ };
+ return CalleeSavedRegs;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ Reserved.set(XCore::CP);
+ Reserved.set(XCore::DP);
+ Reserved.set(XCore::SP);
+ Reserved.set(XCore::LR);
+ if (TFI->hasFP(MF)) {
+ Reserved.set(XCore::R10);
+ }
+ return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // TODO can we estimate stack size?
+ return TFI->hasFP(MF);
+}
+
+bool
+XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ return false;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+ errs() << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &FrameOp = MI.getOperand(i);
+ int FrameIndex = FrameOp.getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ int StackSize = MF.getFrameInfo()->getStackSize();
+
+ #ifndef NDEBUG
+ DEBUG(errs() << "\nFunction : "
+ << MF.getFunction()->getName() << "\n");
+ DEBUG(errs() << "<--------->\n");
+ DEBUG(MI.print(errs()));
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n");
+ DEBUG(errs() << "FrameOffset : " << Offset << "\n");
+ DEBUG(errs() << "StackSize : " << StackSize << "\n");
+ #endif
+
+ Offset += StackSize;
+
+ unsigned FrameReg = getFrameRegister(MF);
+
+ // Special handling of DBG_VALUE instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // fold constant into offset.
+ Offset += MI.getOperand(i + 1).getImm();
+ MI.getOperand(i + 1).ChangeToImmediate(0);
+
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ Offset/=4;
+
+ bool FP = TFI->hasFP(MF);
+
+ unsigned Reg = MI.getOperand(0).getReg();
+ bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+ assert(XCore::GRRegsRegisterClass->contains(Reg) &&
+ "Unexpected register operand");
+
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ if (FP) {
+ bool isUs = isImmUs(Offset);
+
+ if (!isUs) {
+ if (!RS)
+ report_fatal_error("eliminateFrameIndex Frame size too big: " +
+ Twine(Offset));
+ unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+ SPAdj);
+ loadConstant(MBB, II, ScratchReg, Offset, dl);
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::STWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::LDAWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ } else {
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ }
+ } else {
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("eliminateFrameIndex Frame size too big: " +
+ Twine(Offset));
+
+ switch (MI.getOpcode()) {
+ int NewOpcode;
+ case XCore::LDWFI:
+ NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode))
+ .addReg(Reg, getKillRegState(isKill))
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ }
+ // Erase old instruction.
+ MBB.erase(II);
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const {
+ // TODO use mkmsk if possible.
+ if (!isImmU16(Value)) {
+ // TODO use constant pool.
+ report_fatal_error("loadConstant value too big " + Twine(Value));
+ }
+ int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 0000000..5c28f39
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,80 @@
+//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "XCoreGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+ void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+ void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+ XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //! Return the array of argument passing registers
+ /*!
+ \note The size of this array is returned by getArgRegsSize().
+ */
+ static const unsigned *getArgRegs(const MachineFunction *MF = 0);
+
+ //! Return the size of the argument passing register array
+ static unsigned getNumArgRegs(const MachineFunction *MF = 0);
+
+ //! Return whether to emit frame moves
+ static bool needsFrameMoves(const MachineFunction &MF);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 0000000..c354230
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,56 @@
+//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the XCore register file
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+ let Num = num;
+}
+
+// CPU registers
+def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+ // Return values and arguments
+ (add R0, R1, R2, R3,
+ // Not preserved across procedure calls
+ R11,
+ // Callee save
+ R4, R5, R6, R7, R8, R9, R10)>;
+
+// Reserved
+def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 0000000..44aeb60
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreTargetMachine.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 0000000..0386968
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine;
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+ ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 0000000..b4e9927
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,28 @@
+//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "XCoreGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+XCoreSubtarget::XCoreSubtarget(const std::string &TT,
+ const std::string &CPU, const std::string &FS)
+ : XCoreGenSubtargetInfo(TT, CPU, FS)
+{
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 0000000..7b29fa2
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,42 @@
+//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "XCoreGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class XCoreSubtarget : public XCoreGenSubtargetInfo {
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ XCoreSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 0000000..fdc5d35
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,44 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+ "i16:16:32-i32:32:32-i64:32:32-n32"),
+ InstrInfo(),
+ FrameLowering(Subtarget),
+ TLInfo(*this),
+ TSInfo(*this) {
+}
+
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createXCoreISelDag(*this));
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() {
+ RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 0000000..83d09d6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,63 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreSubtarget.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+ XCoreSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameLowering FrameLowering;
+ XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
+public:
+ XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const XCoreFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const XCoreTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
new file mode 100644
index 0000000..7f4e1c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetObjectFile.h"
+#include "XCoreSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+
+void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ DataSection =
+ Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getDataRel());
+ BSSSection =
+ Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getBSS());
+
+ MergeableConst4Section =
+ Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst4());
+ MergeableConst8Section =
+ Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst8());
+ MergeableConst16Section =
+ Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst16());
+
+ // TLS globals are lowered in the backend to arrays indexed by the current
+ // thread id. After lowering they require no special handling by the linker
+ // and can be placed in the standard data / bss sections.
+ TLSDataSection = DataSection;
+ TLSBSSSection = BSSSection;
+
+ ReadOnlySection =
+ Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getReadOnlyWithRel());
+
+ // Dynamic linking is not supported. Data with relocations is placed in the
+ // same section as data without relocations.
+ DataRelSection = DataRelLocalSection = DataSection;
+ DataRelROSection = DataRelROLocalSection = ReadOnlySection;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
new file mode 100644
index 0000000..7424c78
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -0,0 +1,25 @@
+//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
+ public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ // TODO: Classify globals as xcore wishes.
+ };
+} // end namespace llvm
+
+#endif
OpenPOWER on IntegriCloud